KeyError on ingested bundle in run_algorithm
lamres opened this issue · comments
Hi everyone!
I ingested a bundle by csv like this
from zipline.data.bundles import register
from zipline.data.bundles.csvdir import csvdir_equities
import pandas as pd
start_session = pd.Timestamp('2021-1-1', tz='utc')
end_session = pd.Timestamp('2021-4-22', tz='utc')
register(
'binance_test',
csvdir_equities(
['minute'],
'/Users/sermal/.zipline/custom_data/csv',
),
calendar_name='24/7',
minutes_per_day=1440,
start_session=start_session,
end_session=end_session,
)
After that I'm trying to run the Algo by this code
def initialize(context):
context.asset = symbol('BTCUSDT')
order_target_percent(context.asset, 1.0)
def handle_data(context, data):
current_time = get_datetime().time()
if current_time.hour == 0 and current_time.minute == 0:
print('Current date is ' + str(get_datetime().date()))
run_algorithm(
capital_base = 1000000,
data_frequency = 'minute',
initialize = initialize,
handle_data = handle_data,
bundle = 'binance_test',
start=pd.to_datetime('2021-1-3', utc=True),
end=pd.to_datetime('2021-1-4', utc=True))
But I'm getting this error:
---------------------------------------------------------------------------
KeyError Traceback (most recent call last)
pandas/_libs/index.pyx in pandas._libs.index.DatetimeEngine.get_loc()
pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.Int64HashTable.get_item()
pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.Int64HashTable.get_item()
KeyError: 1609459200000000000
During handling of the above exception, another exception occurred:
KeyError Traceback (most recent call last)
<ipython-input-3-8711d6cc09d8> in <module>()
19 bundle = 'binance_test',
20 start=pd.to_datetime('2021-1-3', utc=True),
---> 21 end=pd.to_datetime('2021-1-4', utc=True))
/opt/anaconda3/envs/env_zipline/lib/python3.5/site-packages/zipline/utils/run_algo.py in run_algorithm(start, end, initialize, capital_base, handle_data, before_trading_start, analyze, data_frequency, bundle, bundle_timestamp, trading_calendar, metrics_set, benchmark_returns, default_extension, extensions, strict_extensions, environ, blotter)
407 environ=environ,
408 blotter=blotter,
--> 409 benchmark_spec=benchmark_spec,
410 )
411
/opt/anaconda3/envs/env_zipline/lib/python3.5/site-packages/zipline/utils/run_algo.py in _run(handle_data, initialize, before_trading_start, analyze, algofile, algotext, defines, data_frequency, capital_base, bundle, bundle_timestamp, start, end, output, trading_calendar, print_algo, metrics_set, local_namespace, environ, blotter, benchmark_spec)
163 equity_minute_reader=bundle_data.equity_minute_bar_reader,
164 equity_daily_reader=bundle_data.equity_daily_bar_reader,
--> 165 adjustment_reader=bundle_data.adjustment_reader,
166 )
167
/opt/anaconda3/envs/env_zipline/lib/python3.5/site-packages/zipline/data/data_portal.py in __init__(self, asset_finder, trading_calendar, first_trading_day, equity_daily_reader, equity_minute_reader, future_daily_reader, future_minute_reader, adjustment_reader, last_available_session, last_available_minute, minute_history_prefetch_length, daily_history_prefetch_length)
289 self._first_trading_day
290 )
--> 291 if self._first_trading_day is not None else (None, None)
292 )
293
/opt/anaconda3/envs/env_zipline/lib/python3.5/site-packages/trading_calendars/trading_calendar.py in open_and_close_for_session(self, session_label)
763 # http://pandas.pydata.org/pandas-docs/stable/whatsnew.html#datetime-with-tz # noqa
764 return (
--> 765 sched.at[session_label, 'market_open'].tz_localize(UTC),
766 sched.at[session_label, 'market_close'].tz_localize(UTC),
767 )
/opt/anaconda3/envs/env_zipline/lib/python3.5/site-packages/pandas/core/indexing.py in __getitem__(self, key)
1867
1868 key = self._convert_key(key)
-> 1869 return self.obj._get_value(*key, takeable=self._takeable)
1870
1871 def __setitem__(self, key, value):
/opt/anaconda3/envs/env_zipline/lib/python3.5/site-packages/pandas/core/frame.py in _get_value(self, index, col, takeable)
1983
1984 try:
-> 1985 return engine.get_value(series._values, index)
1986 except (TypeError, ValueError):
1987
pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_value()
pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_value()
pandas/_libs/index.pyx in pandas._libs.index.DatetimeEngine.get_loc()
KeyError: Timestamp('2021-01-01 00:00:00+0000', tz='UTC')
I use zipline version 1.4.1. What could be a reason of this?
There are many similar issue reports. Each could have different reasons.
Do you have your custom CSV files which does not have data on 2021-01-01?
In my case, I have CSV files whose starting date is different with each.
I have cleaned up dates data before a certain point of time(for ex 2021-1-2), and make all the CSV files data starting from the same date.
My data file has candlesticks starting from summer 2017. This data contains a format like 2021-01-01 00:00:00, but error with example like 2021-01-01 00:00:00+0000, maybe trouble is here.
Hi @lamres,
I am having the same issue. Did you find any solution for that?
@fazrad Hi.
Unfortunately, I didn't.
@lamres
If you don't pass the trading_calendar
when calling run_algorithm()
, the Zipline uses the default calendar which is 'XNYS'.
You can solve it this way:
from trading_calendars import get_calendar
run_algorithm(..., trading_calendar=get_calendar('Binance'))
Hi there, I have almost the same problem. The ingested data is from 2020, but still I get the error
KeyError: Timestamp('2003-08-23 00:00:00+0000', tz='UTC')
This is exactly 20 years earlier than the day the data was ingested. Do you know how to solve this?
`---------------------------------------------------------------------------
KeyError Traceback (most recent call last)
File pandas_libs\index.pyx:444, in pandas._libs.index.DatetimeEngine.get_loc()
File pandas_libs\hashtable_class_helper.pxi:1625, in pandas._libs.hashtable.Int64HashTable.get_item()
File pandas_libs\hashtable_class_helper.pxi:1632, in pandas._libs.hashtable.Int64HashTable.get_item()
KeyError: 1061596800000000000
During handling of the above exception, another exception occurred:
KeyError Traceback (most recent call last)
File c:\Users\henry\miniconda3\envs\ml4t\lib\site-packages\pandas\core\indexes\base.py:3081, in Index.get_loc(self, key, method, tolerance)
3080 try:
-> 3081 return self._engine.get_loc(casted_key)
3082 except KeyError as err:
File pandas_libs\index.pyx:413, in pandas._libs.index.DatetimeEngine.get_loc()
File pandas_libs\index.pyx:446, in pandas._libs.index.DatetimeEngine.get_loc()
KeyError: Timestamp('2003-08-23 00:00:00+0000', tz='UTC')
The above exception was the direct cause of the following exception:
...
686 return Index.get_loc(self, key, method, tolerance)
687 except KeyError as err:
--> 688 raise KeyError(orig_key) from err
KeyError: Timestamp('2003-08-23 00:00:00+0000', tz='UTC')`