scikit-hep / fastjet

Jet-finding in the Scikit-HEP ecosystem.

Home Page:https://fastjet.readthedocs.io

Geek Repo:Geek Repo

Github PK Tool:Github PK Tool

Tests failing due to new awkward release?

rkansal47 opened this issue · comments

Some weird tests failures now [1, 2].

@lgray @jpivarski do the tests perhaps need to be modified because of awkward going from 2.3.1 -> 2.3.3?

They appear to have passed 3 weeks ago with awkward 2.3.1: #244

[1] tests/test_002-exclusive_jets.py::test_listoffset_indexed_input

def test_listoffset_indexed_input():
        inputs = ak.Array(
            [
                [
                    {"px": 1.2, "py": 3.2, "pz": 5.4, "E": 2.5, "ex": 0.78},
                    {"px": 32.2, "py": 64.21, "pz": 543.34, "E": 24.12, "ex": 0.35},
                    {"px": 32.45, "py": 63.21, "pz": 543.14, "E": 24.56, "ex": 0.0},
                ],
                [
                    {"px": 11.2, "py": 3.2, "pz": 5.4, "E": 2.5, "ex": 0.78},
                    {"px": 32.2, "py": 64.21, "pz": 543.34, "E": 24.12, "ex": 0.35},
                    {"px": 32.45, "py": 63.21, "pz": 543.14, "E": 24.56, "ex": 0.0},
                ],
                [
                    {"px": 11.2, "py": 3.2, "pz": 5.4, "E": 2.5, "ex": 0.78},
                    {"px": 32.2, "py": 64.21, "pz": 543.34, "E": 24.12, "ex": 0.35},
                    {"px": 32.45, "py": 63.21, "pz": 543.14, "E": 24.56, "ex": 0.1},
                ],
            ],
            with_name="Momentum4D",
        )
        out = ak.Array(
            ak.contents.RecordArray(
                [
                    ak.contents.NumpyArray(np.asarray(ak.Array(inputs.layout.content).px)),
                    ak.contents.NumpyArray(np.asarray(ak.Array(inputs.layout.content).py)),
                    ak.contents.NumpyArray(np.asarray(ak.Array(inputs.layout.content).pz)),
                    ak.contents.NumpyArray(np.asarray(ak.Array(inputs.layout.content).E)),
                ],
                ["px", "py", "pz", "E"],
            )
        )
        out = ak.Array(
            ak.contents.IndexedArray(
                ak.index.Index64([7, 2, 3, 1, 0, 5, 4, 6, 8]), out.layout
            )
        )
        out = ak.Array(ak.contents.ListOffsetArray(inputs.layout.offsets, out.layout))
        jetdef = fastjet.JetDefinition(fastjet.antikt_algorithm, 0.6)
        cluster = fastjet._pyjet.AwkwardClusterSequence(out, jetdef)
        inclusive_jets = [
            [
                {"px": 11.2, "py": 3.2, "pz": 5.4, "E": 2.5},
                {"px": 64.65, "py": 127.41999999999999, "pz": 1086.48, "E": 48.68},
            ],
            [
                {"px": 1.2, "py": 3.2, "pz": 5.4, "E": 2.5},
                {"px": 64.65, "py": 127.41999999999999, "pz": 1086.48, "E": 48.68},
            ],
            [
                {"px": 11.2, "py": 3.2, "pz": 5.4, "E": 2.5},
                {"px": 64.65, "py": 127.41999999999999, "pz": 1086.48, "E": 48.68},
            ],
        ]
    
>       assert inclusive_jets == cluster.inclusive_jets().to_list()
E       AssertionError: assert [[{'px': 11.2, 'py': 3.2, 'pz': 5.4, 'E': 2.5}, {'px': 64.65, 'py': 127.41999999999999, 'pz': 1086.48, 'E': 48.68}], [{'px': 1.2, 'py': 3.2, 'pz': 5.4, 'E': 2.5}, {'px': 64.65, 'py': 127.41999999999999, 'pz': 1086.48, 'E': 48.68}], [{'px': 11.2, 'py': 3.2, 'pz': 5.4, 'E': 2.5}, {'px': 64.65, 'py': 127.41999999999999, 'pz': 1086.48, 'E': 48.68}]] == [[{'px': 6e-323, 'py': 51.18, 'pz': 6e-323, 'E': 51.18}], [{'px': 3e-323, 'py': 51.18, 'pz': 3e-323, 'E': 51.18}], [{'px': 9e-323, 'py': 51.18, 'pz': 9e-323, 'E': 51.18}]]

[2] tests/test_007-general.py::test_indexed_subtree_input

   def test_indexed_subtree_input():
        inputs = ak.Array(
            [
                [
                    {"px": 1.2, "py": 3.2, "pz": 5.4, "E": 2.5, "ex": 0.78},
                    {"px": 32.2, "py": 64.21, "pz": 543.34, "E": 24.12, "ex": 0.35},
                    {"px": 32.45, "py": 63.21, "pz": 543.14, "E": 24.56, "ex": 0.0},
                ],
                [
                    {"px": 11.2, "py": 3.2, "pz": 5.4, "E": 2.5, "ex": 0.78},
                    {"px": 32.2, "py": 64.21, "pz": 543.34, "E": 24.12, "ex": 0.35},
                    {"px": 32.45, "py": 63.21, "pz": 543.14, "E": 24.56, "ex": 0.0},
                ],
                [
                    {"px": 11.2, "py": 3.2, "pz": 5.4, "E": 2.5, "ex": 0.78},
                    {"px": 32.2, "py": 64.21, "pz": 543.34, "E": 24.12, "ex": 0.35},
                    {"px": 32.45, "py": 63.21, "pz": 543.14, "E": 24.56, "ex": 0.1},
                ],
            ],
            with_name="Momentum4D",
        )
        out = ak.Array(
            ak.contents.RecordArray(
                [
                    ak.contents.NumpyArray(np.asarray(ak.Array(inputs.layout.content).px)),
                    ak.contents.NumpyArray(np.asarray(ak.Array(inputs.layout.content).py)),
                    ak.contents.NumpyArray(np.asarray(ak.Array(inputs.layout.content).pz)),
                    ak.contents.NumpyArray(np.asarray(ak.Array(inputs.layout.content).E)),
                ],
                ["px", "py", "pz", "E"],
            )
        )
        out = ak.Array(
            ak.contents.IndexedArray(
                ak.index.Index64([7, 2, 3, 1, 0, 5, 4, 6, 8]), out.layout
            )
        )
        out = ak.Array(ak.contents.ListOffsetArray(inputs.layout.offsets, out.layout))
        out = ak.Array(
            ak.contents.ListOffsetArray(ak.index.Index64([0, 1, 2, 3]), out.layout)
        )
        out = ak.Array(
            ak.contents.ListOffsetArray(ak.index.Index64([0, 1, 2, 3]), out.layout)
        )
        out = ak.Array(
            ak.contents.ListOffsetArray(ak.index.Index64([0, 1, 2, 3]), out.layout)
        )
        jetdef = fastjet.JetDefinition(fastjet.antikt_algorithm, 0.6)
        cluster = fastjet.ClusterSequence(out, jetdef)
        inclusive_jets = [
            [
                [
                    [
                        [
                            {"px": 11.2, "py": 3.2, "pz": 5.4, "E": 2.5},
                            {
                                "px": 64.65,
                                "py": 127.41999999999999,
                                "pz": 1086.48,
                                "E": 48.68,
                            },
                        ]
                    ]
                ]
            ],
            [
                [
                    [
                        [
                            {"px": 1.2, "py": 3.2, "pz": 5.4, "E": 2.5},
                            {
                                "px": 64.65,
                                "py": 127.41999999999999,
                                "pz": 1086.48,
                                "E": 48.68,
                            },
                        ]
                    ]
                ]
            ],
            [
                [
                    [
                        [
                            {"px": 11.2, "py": 3.2, "pz": 5.4, "E": 2.5},
                            {
                                "px": 64.65,
                                "py": 127.41999999999999,
                                "pz": 1086.48,
                                "E": 48.68,
                            },
                        ]
                    ]
                ]
            ],
        ]
>       assert inclusive_jets == cluster.inclusive_jets().to_list()
E       AssertionError: assert [[[[[{'px': 11.2, 'py': 3.2, 'pz': 5.4, 'E': 2.5}, {'px': 64.65, 'py': 127.41999999999999, 'pz': 1086.48, 'E': 48.68}]]]], [[[[{'px': 1.2, 'py': 3.2, 'pz': 5.4, 'E': 2.5}, {'px': 64.65, 'py': 127.41999999999999, 'pz': 1086.48, 'E': 48.68}]]]], [[[[{'px': 11.2, 'py': 3.2, 'pz': 5.4, 'E': 2.5}, {'px': 64.65, 'py': 127.41999999999999, 'pz': 1086.48, 'E': 48.68}]]]]] == [[[[[{'px': 6e-323, 'py': 51.18, 'pz': 6e-323, 'E': 51.18}]]]], [[[[{'px': 3e-323, 'py': 51.18, 'pz': 3e-323, 'E': 51.18}]]]], [[[[{'px': 9e-323, 'py': 51.18, 'pz': 9e-323, 'E': 51.18}]]]]]

I don't see anything in the last few releases that would touch something so low-level: it's looking at memory and picking up random junk. In the last test, quoted above, the expected result

[[[[[
    {'px': 11.2, 'py': 3.2, 'pz': 5.4, 'E': 2.5},
    {'px': 64.65, 'py': 127.41999999999999, 'pz': 1086.48, 'E': 48.68}]
]]], [[[[
    {'px': 1.2, 'py': 3.2, 'pz': 5.4, 'E': 2.5},
    {'px': 64.65, 'py': 127.41999999999999, 'pz': 1086.48, 'E': 48.68}
]]]], [[[[
    {'px': 11.2, 'py': 3.2, 'pz': 5.4, 'E': 2.5},
    {'px': 64.65, 'py': 127.41999999999999, 'pz': 1086.48, 'E': 48.68}
]]]]]

came out as

[
    [[[[{'px': 6e-323, 'py': 51.18, 'pz': 6e-323, 'E': 51.18}]]]],
    [[[[{'px': 3e-323, 'py': 51.18, 'pz': 3e-323, 'E': 51.18}]]]],
    [[[[{'px': 9e-323, 'py': 51.18, 'pz': 9e-323, 'E': 51.18}]]]],
]

It has the same depth of structure (the nested Python objects), but there are wrong offsets here (list lengths are different) and wrong values (6e-323 is not a plausible value, and 51.18 seems to be duplicated).

It looks like this is probably caused by a bug in the array-interface implementation, which is fixed by scikit-hep/awkward#2697.

We'll get another release out, and see what happens!

Great, looks these tests are now passing! However, there's one failure [1] now I guess because dask-awkward added pyarrow as a dependency for its testing. I'll add dask-awkward[test] as a dependency for our tests to resolve this, unless @lgray (or others) have better suggestions.

[1]

___________________________ test_inclusive_from_file ___________________________
    def test_inclusive_from_file():
        from pathlib import Path
    
        import uproot
>       from dask_awkward.lib.testutils import assert_eq
tests/test_008-dask.py:103: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
    from __future__ import annotations
    
    import random
    from typing import Any
    
    import awkward as ak
    import numpy as np
>   import pyarrow as pa
E   ModuleNotFoundError: No module named 'pyarrow'
../../../hostedtoolcache/Python/3.8.17/x64/lib/python3.8/site-packages/dask_awkward/lib/testutils.py:8: ModuleNotFoundError