Consider adding OA collection thoth-archiving-network to OL Import List
mekarpeles opened this issue · comments
Mek commented
openlibrary/openlibrary/core/ia.py
Lines 284 to 315 in b3d692a
params = { | |
'q': ' AND '.join( | |
[ | |
'mediatype:texts', | |
'(%s)' | |
% ' OR '.join( | |
f'repub_state:{state}' for state in VALID_READY_REPUB_STATES | |
), | |
'scanningcenter:*', | |
'scanner:*', | |
'scandate:*', | |
'!collection:opensource', | |
'!collection:additional_collections', | |
'!collection:litigationworks', | |
'!noindex:true', | |
'!is_dark:true', | |
'format:pdf', | |
f'indexdate:{day}*', | |
# Fetch back to items added before the day of interest, since items | |
# sometimes take a few days to process into the collection. | |
f'addeddate:[{day - 60 * DAY} TO {day + 1 * DAY}]', | |
# TODO: This seems to be getting more records than expected | |
*(['format:marc'] if marcs else []), | |
] | |
), | |
'fl': 'identifier,format', | |
'service': 'metadata__unlimited', | |
'rows': '100000', # This is the max, I believe | |
'output': 'json', | |
} | |
return f'{IA_BASE_URL}/advancedsearch.php?' + urlencode(params) |
Pending policy approval