facing SSL connection timeout OR Proxy CONNECT timeout with AsyncDDGS
adhadse opened this issue · comments
Hey there.
I'm facing an issue running AsyncDDGS with proxy (http endpoint). Even after multiple retries, there are multiple search failing and only few returning results.
Most prominently I receieved were:
RequestsError: Failed to perform, curl: (28) Proxy CONNECT aborted due to timeout.
SSL connection timeout
Could it be that there are too many connection being made via the Async code and so multiple connections are failing via proxy?
Code:
import loguru
import asyncio
import itertools
from duckduckgo_search import AsyncDDGS
logger = loguru.logger
# Async retry decorator
def wait_incrementing(start, increment, max_wait):
async def wait_strategy(retry_number):
return min(start + increment * retry_number, max_wait)
return wait_strategy
def stop_after_attempt(max_attempts):
async def stop_strategy(retry_number):
return retry_number >= max_attempts
return stop_strategy
def async_retry(wait=None, stop=None):
if wait is None:
wait = asyncio.sleep
if stop is None:
async def stop_strategy(retry_number):
return False
else:
stop_strategy = stop
def decorator(func):
@wraps(func)
async def wrapper(self, *args, **kwargs):
retry_number = 0
while True:
try:
return await func(self, *args, **kwargs)
except Exception as e:
retry_number += 1
logger.info(
f"Retrying {retry_number}",
keyword=kwargs["query"],
)
if await stop_strategy(retry_number):
raise e
await asyncio.sleep(await wait(retry_number))
return wrapper
return decorator
def exception_handling(func):
@wraps(func)
async def wrapper(self, *args, **kwargs):
try:
return await func(self, *args, **kwargs)
except Exception as e:
logger.exception(f"Exception handled {e}")
return []
return wrapper
# operation functions
@exception_handling
@async_retry(
wait=wait_incrementing(start=5, increment=10, max_wait=30),
stop=stop_after_attempt(5)
)
async def aget_results(query):
results = await AsyncDDGS(proxies=PROXY).news(
keywords=word, region="wt-wt", safesearch="off", timelimit="1d", max_results=20
)
return results
async def main():
words = ["sun", "earth", "moon"] * 1_000 # increase the number
tasks = [aget_results(query=w) for w in words]
results = await asyncio.gather(*tasks)
results = list(itertools.chain.from_iterable(results))
print(len(results))
return results
if __name__ == "__main__":
results = asyncio.run(main())
print(len(results))
What could be the problem, if the number of connections is the problem how can I control it. I do remember semaphores exist but how to use it here?
Your code doesn't work
Use asyncio.Semaphore to limit the number of concurrent requests.
Increase timeout.
Correct 'timelimit'.
import asyncio
import logging
from duckduckgo_search import AsyncDDGS
logging.basicConfig(level=logging.DEBUG)
SEM = asyncio.Semaphore(10)
proxies = "socks5://localhost:9150"
async def aget_results(keywords):
async with SEM:
while True:
try:
results = await AsyncDDGS(proxies=proxies, timeout=20).news(
keywords,
region="wt-wt",
safesearch="off",
timelimit="d",
max_results=20,
)
return results
except Exception as ex:
logging.warning(f"{type(ex).__name__}: {ex}")
async def main():
words = ["sun", "earth", "moon"]
tasks = [aget_results(keywords=w) for w in words]
results = await asyncio.gather(*tasks)
for r in results:
print(r)
print(len(r))
return results
if __name__ == "__main__":
asyncio.run(main())