Some buckets fail to download after some time
hellais opened this issue · comments
It's not fully reproducible, but I was able to reproduce it after running the pip
version of oonidata
, like so:
% oonidata sync --probe-cc RU --start-day 2023-10-01 --end-day 2023-10-31 --test-name web_connectivity --output-dir Russia/2023/
Downloading measurements for 2023-10-01 - 2023-10-31 into Russia/2023
probe_cc: RU
test_name: web_connectivity
downloaded 20/720: 3%|█▌ | 1.35G/51.9G [01:26<53:54, 15.6MB/s]
multiprocessing.pool.RemoteTraceback:
"""
Traceback (most recent call last):
File "/Users/art/.pyenv/versions/3.11.7/lib/python3.11/multiprocessing/pool.py", line 125, in worker
result = (True, func(*args, **kwds))
^^^^^^^^^^^^^^^^^^^
File "/Users/art/.pyenv/versions/3.11.7/envs/oonidata/lib/python3.11/site-packages/oonidata/dataclient.py", line 651, in download_file_entry_list
for msmt_dict in iter_measurements(
File "/Users/art/.pyenv/versions/3.11.7/envs/oonidata/lib/python3.11/site-packages/oonidata/dataclient.py", line 594, in iter_measurements
for msmt in fe.stream_measurements():
File "/Users/art/.pyenv/versions/3.11.7/envs/oonidata/lib/python3.11/site-packages/oonidata/dataclient.py", line 278, in stream_measurements
yield from stream_postcan(body)
File "/Users/art/.pyenv/versions/3.11.7/envs/oonidata/lib/python3.11/site-packages/oonidata/dataclient.py", line 162, in stream_postcan
post = orjson.loads(in_file.read())
^^^^^^^^^^^^^^^^^^^^^^^^^^^^
orjson.JSONDecodeError: unexpected character: line 1 column 17 (char 16)
"""
The above exception was the direct cause of the following exception:
Traceback (most recent call last):
File "/Users/art/.pyenv/versions/oonidata/bin/oonidata", line 8, in <module>
sys.exit(cli())
^^^^^
File "/Users/art/.pyenv/versions/3.11.7/envs/oonidata/lib/python3.11/site-packages/click/core.py", line 1157, in __call__
return self.main(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/Users/art/.pyenv/versions/3.11.7/envs/oonidata/lib/python3.11/site-packages/click/core.py", line 1078, in main
rv = self.invoke(ctx)
^^^^^^^^^^^^^^^^
File "/Users/art/.pyenv/versions/3.11.7/envs/oonidata/lib/python3.11/site-packages/click/core.py", line 1688, in invoke
return _process_result(sub_ctx.command.invoke(sub_ctx))
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/Users/art/.pyenv/versions/3.11.7/envs/oonidata/lib/python3.11/site-packages/click/core.py", line 1434, in invoke
return ctx.invoke(self.callback, **ctx.params)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/Users/art/.pyenv/versions/3.11.7/envs/oonidata/lib/python3.11/site-packages/click/core.py", line 783, in invoke
return __callback(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/Users/art/.pyenv/versions/3.11.7/envs/oonidata/lib/python3.11/site-packages/oonidata/cli/command.py", line 102, in sync
sync_measurements(
File "/Users/art/.pyenv/versions/3.11.7/envs/oonidata/lib/python3.11/site-packages/oonidata/dataclient.py", line 721, in sync_measurements
for fe_size in pool.imap_unordered(
File "/Users/art/.pyenv/versions/3.11.7/lib/python3.11/multiprocessing/pool.py", line 873, in next
raise value
orjson.JSONDecodeError: unexpected character: line 1 column 17 (char 16)