deedy5 / duckduckgo_search

Search for words, documents, images, videos, news, maps and text translation using the DuckDuckGo.com search engine. Downloading files and images to a local hard drive.

Geek Repo:Geek Repo

Github PK Tool:Github PK Tool

facing SSL connection timeout OR Proxy CONNECT timeout with AsyncDDGS

adhadse opened this issue · comments

Hey there.

I'm facing an issue running AsyncDDGS with proxy (http endpoint). Even after multiple retries, there are multiple search failing and only few returning results.

Most prominently I receieved were:

  • RequestsError: Failed to perform, curl: (28) Proxy CONNECT aborted due to timeout.
  • SSL connection timeout

Could it be that there are too many connection being made via the Async code and so multiple connections are failing via proxy?

Code:

import loguru
import asyncio
import itertools
from duckduckgo_search import AsyncDDGS

logger = loguru.logger


# Async retry decorator
def wait_incrementing(start, increment, max_wait):
    async def wait_strategy(retry_number):
        return min(start + increment * retry_number, max_wait)
    return wait_strategy


def stop_after_attempt(max_attempts):
    async def stop_strategy(retry_number):
        return retry_number >= max_attempts
    return stop_strategy


def async_retry(wait=None, stop=None):
    if wait is None:
        wait = asyncio.sleep

    if stop is None:
        async def stop_strategy(retry_number):
            return False
    else:
        stop_strategy = stop

    def decorator(func):
        @wraps(func)
        async def wrapper(self, *args, **kwargs):
            retry_number = 0
            while True:
                try:
                    return await func(self, *args, **kwargs)
                except Exception as e:
                    retry_number += 1
                    logger.info(
                        f"Retrying {retry_number}",
                        keyword=kwargs["query"],
                    )
                    if await stop_strategy(retry_number):
                        raise e
                    await asyncio.sleep(await wait(retry_number))
        return wrapper
    return decorator


def exception_handling(func):
    @wraps(func)
    async def wrapper(self, *args, **kwargs):
        try:
            return await func(self, *args, **kwargs)
        except Exception as e:
            logger.exception(f"Exception handled {e}")
            return []
    return wrapper

# operation functions

@exception_handling
@async_retry(
    wait=wait_incrementing(start=5, increment=10, max_wait=30),
    stop=stop_after_attempt(5)
)
async def aget_results(query):
    results = await AsyncDDGS(proxies=PROXY).news(
        keywords=word, region="wt-wt", safesearch="off", timelimit="1d", max_results=20
    )
    return results


async def main():
    words = ["sun", "earth", "moon"] * 1_000  # increase the number
    tasks = [aget_results(query=w) for w in words]
    results = await asyncio.gather(*tasks)
    results = list(itertools.chain.from_iterable(results))
    print(len(results))
    return results

if __name__ == "__main__":
    results = asyncio.run(main())
    print(len(results))

What could be the problem, if the number of connections is the problem how can I control it. I do remember semaphores exist but how to use it here?

Your code doesn't work

Use asyncio.Semaphore to limit the number of concurrent requests.
Increase timeout.
Correct 'timelimit'.

import asyncio
import logging

from duckduckgo_search import AsyncDDGS

logging.basicConfig(level=logging.DEBUG)
SEM = asyncio.Semaphore(10)
proxies = "socks5://localhost:9150"


async def aget_results(keywords):
    async with SEM:
        while True:
            try:
                results = await AsyncDDGS(proxies=proxies, timeout=20).news(
                    keywords,
                    region="wt-wt",
                    safesearch="off",
                    timelimit="d",
                    max_results=20,
                )
                return results
            except Exception as ex:
                logging.warning(f"{type(ex).__name__}: {ex}")


async def main():
    words = ["sun", "earth", "moon"]
    tasks = [aget_results(keywords=w) for w in words]
    results = await asyncio.gather(*tasks)
    for r in results:      
        print(r)
        print(len(r))
    return results


if __name__ == "__main__":
    asyncio.run(main())