Compatibility with Pandas 1.1 on event datetime ms and ns array
tanguycdls opened this issue · comments
Hi Thanks for you work here :) !
With the latest release of Pandas 1.1:
import numpy as np
import pandas as pd
import fletcher as ftr
import pyarrow as pa
from fletcher import pandas_from_arrow
print('pandas version', pd.__version__)
print('Fletcher version', ftr.__version__)
print('Pyarrow version', pa.__version__)
pseudo_array = (np.datetime64("2020-06-06") + np.random.randint(0, 10, size=1000)).copy()
pararray = pa.array(pseudo_array.astype('datetime64[ns]'))
pandas_from_arrow(pararray, continuous=True)
returns
pandas version 1.1.0
Fletcher version 0.6.2
Pyarrow version 2.0.0
<ipython-input-1-60eb2b0704e9> in <module>
10 pseudo_array = np.datetime64("2020-06-06") + np.random.randint(0, 10, size=1000)
11 pararray = pa.array(pseudo_array.astype('datetime64[ns]'))
---> 12 pandas_from_arrow(pararray, continuous=True)
/opt/conda/envs/model/lib/python3.7/site-packages/fletcher/base.py in pandas_from_arrow(arrow_object, continuous)
1740 return pd.DataFrame(data)
1741 elif isinstance(arrow_object, (pa.ChunkedArray, pa.Array)):
-> 1742 return pd.Series(array_type(arrow_object))
1743 else:
1744 raise NotImplementedError(
/opt/conda/envs/model/lib/python3.7/site-packages/pandas/core/series.py in __init__(self, data, index, dtype, name, copy, fastpath)
327 data = sanitize_array(data, index, dtype, copy, raise_cast_failure=True)
328
--> 329 data = SingleBlockManager.from_array(data, index)
330
331 generic.NDFrame.__init__(self, data)
/opt/conda/envs/model/lib/python3.7/site-packages/pandas/core/internals/managers.py in from_array(cls, array, index)
1534 Constructor for if we have an array that is not yet a Block.
1535 """
-> 1536 block = make_block(array, placement=slice(0, len(index)), ndim=1)
1537 return cls(block, index)
1538
/opt/conda/envs/model/lib/python3.7/site-packages/pandas/core/internals/blocks.py in make_block(values, placement, klass, ndim, dtype)
2715 values = DatetimeArray._simple_new(values, dtype=dtype)
2716
-> 2717 return klass(values, ndim=ndim, placement=placement)
2718
2719
/opt/conda/envs/model/lib/python3.7/site-packages/pandas/core/internals/blocks.py in __init__(self, values, placement, ndim)
1539 classes mixed in with this Mixin.
1540 """
-> 1541 values = self._maybe_coerce_values(values)
1542
1543 # Placement must be converted to BlockPlacement so that we can check
/opt/conda/envs/model/lib/python3.7/site-packages/pandas/core/internals/blocks.py in _maybe_coerce_values(self, values)
2165 """
2166 if not isinstance(values, self._holder):
-> 2167 values = self._holder(values)
2168
2169 if values.tz is None:
/opt/conda/envs/model/lib/python3.7/site-packages/pandas/core/arrays/datetimes.py in __init__(self, values, dtype, freq, copy)
239 if not isinstance(values, np.ndarray):
240 raise ValueError(
--> 241 f"Unexpected type '{type(values).__name__}'. 'values' must be "
242 "a DatetimeArray ndarray, or Series or Index containing one of those."
243 )
ValueError: Unexpected type 'FletcherContinuousArray'. 'values' must be a DatetimeArray ndarray, or Series or Index containing one of those.
It looks like Pandas tries to cast it to its new DatetimeArray type and fails since the container is not a numpy array. The bug only happens for ns event datetime and ms, pararray = pa.array(pseudo_array.astype('datetime64[D]'))
.
I dont how we could force to keep it as is and not try to use that feature.
This project has been archived as development has ceased around 2021.
With the support of Apache Arrow-backed extension arrays in pandas
, the major goal of this project has been fulfilled.