xhochy / fletcher

Pandas ExtensionDType/Array backed by Apache Arrow

Home Page:https://fletcher.readthedocs.io/

Geek Repo:Geek Repo

Github PK Tool:Github PK Tool

BaseSetitemTests.test_setitem_integer_array fails with ValueError

xhochy opened this issue · comments

Failure is without fletcher in the stacktrace, so I'm a bit confused:

tests/test_pandas_extension.py::TestBaseSetitemTests::test_setitem_integer_array[True-fletcher_type7-chunked-list] FAILED
>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> traceback >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>

self = 0    ['B' 'C']
1        ['A']
2       [None]
3    ['A' 'A']
4           []
dtype: fletcher_chunked[list<item: string>], key = [0, 1, 2], value = ['B', 'C']

    def __setitem__(self, key, value):
        key = com.apply_if_callable(key, self)
        cacher_needs_updating = self._check_is_chained_assignment_possible()

        try:
>           self._set_with_engine(key, value)

../pandas/pandas/core/series.py:982:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _

self = 0    ['B' 'C']
1        ['A']
2       [None]
3    ['A' 'A']
4           []
dtype: fletcher_chunked[list<item: string>], key = [0, 1, 2], value = ['B', 'C']

    def _set_with_engine(self, key, value):
        # fails with AttributeError for IntervalIndex
>       loc = self.index._engine.get_loc(key)

../pandas/pandas/core/series.py:1015:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _

>   ???

pandas/_libs/index.pyx:61:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _

>   ???
E   TypeError: '[0, 1, 2]' is an invalid key

pandas/_libs/index.pyx:66: TypeError

During handling of the above exception, another exception occurred:

self = <test_pandas_extension.TestBaseSetitemTests object at 0x12679e450>, data = <FletcherChunkedArray>
[['B', 'C'],      ['A'],     [None], ['A', 'A'],         [], ['B', 'C'],
      ['A'],     [None..., ['B', 'C'],
      ['A'],     [None], ['A', 'A'],         []]
Length: 100, dtype: fletcher_chunked[list<item: string>], idx = [0, 1, 2], box_in_series = True

    @pytest.mark.parametrize(
        "idx",
        [[0, 1, 2], pd.array([0, 1, 2], dtype="Int64"), np.array([0, 1, 2])],
        ids=["list", "integer-array", "numpy-array"],
    )
    def test_setitem_integer_array(self, data, idx, box_in_series):
        arr = data[:5].copy()
        expected = data.take([0, 0, 0, 3, 4])

        if box_in_series:
            arr = pd.Series(arr)
            expected = pd.Series(expected)

>       arr[idx] = arr[0]

../pandas/pandas/tests/extension/base/setitem.py:153:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
../pandas/pandas/core/series.py:1008: in __setitem__
    self._set_with(key, value)
../pandas/pandas/core/series.py:1051: in _set_with
    self._set_labels(key, value)
../pandas/pandas/core/series.py:1065: in _set_labels
    self._set_values(indexer, value)
../pandas/pandas/core/series.py:1070: in _set_values
    self._data = self._data.setitem(indexer=key, value=value)
../pandas/pandas/core/internals/managers.py:544: in setitem
    return self.apply("setitem", **kwargs)
../pandas/pandas/core/internals/managers.py:424: in apply
    applied = getattr(b, f)(**kwargs)
../pandas/pandas/core/internals/blocks.py:1816: in setitem
    check_setitem_lengths(indexer, value, self.values)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _

indexer = array([0, 1, 2]), value = ['B', 'C'], values = <FletcherChunkedArray>
[['B', 'C'], ['A'], [None], ['A', 'A'], []]
Length: 5, dtype: fletcher_chunked[list<item: string>]

    def check_setitem_lengths(indexer, value, values) -> None:
        """
        Validate that value and indexer are the same length.

        An special-case is allowed for when the indexer is a boolean array
        and the number of true values equals the length of ``value``. In
        this case, no exception is raised.

        Parameters
        ----------
        indexer : sequence
            Key for the setitem.
        value : array-like
            Value for the setitem.
        values : array-like
            Values being set into.

        Returns
        -------
        None

        Raises
        ------
        ValueError
            When the indexer is an ndarray or list and the lengths don't match.
        """
        # boolean with truth values == len of the value is ok too
        if isinstance(indexer, (np.ndarray, list)):
            if is_list_like(value) and len(indexer) != len(value):
                if not (
                    isinstance(indexer, np.ndarray)
                    and indexer.dtype == np.bool_
                    and len(indexer[indexer]) == len(value)
                ):
                    raise ValueError(
>                       "cannot set using a list-like indexer "
                        "with a different length than the value"
                    )
E                   ValueError: cannot set using a list-like indexer with a different length than the value

../pandas/pandas/core/indexers.py:115: ValueError

Note that this test seems to exist only in pandas master currently.

Could be related to the dtype, seeing the same problem with test_setitem_slice and fletcher_chunked[list<item: string>].

This project has been archived as development has ceased around 2021.
With the support of Apache Arrow-backed extension arrays in pandas, the major goal of this project has been fulfilled.