[WhoScored] Error 404 - Unable to download chromedriver
jackswales opened this issue · comments
I'm following the code from the notebook supplied on the soccerdata website to scrape WhoScored data. I set this up before and it worked fine, but after leaving it a few days its suddenly stopped working:
ws = sd.WhoScored(leagues="ENG-Premier League", seasons=2021)
print(ws.__doc__)
returns:
HTTPError Traceback (most recent call last)
Cell In[9], line 1
----> 1 ws = sd.WhoScored(leagues="ENG-Premier League", seasons=2021)
2 print(ws.__doc__)
File ~\anaconda3\Lib\site-packages\soccerdata\whoscored.py:187, in WhoScored.__init__(self, leagues, seasons, proxy, no_cache, no_store, data_dir, path_to_browser, headless)
173 def __init__(
174 self,
175 leagues: Optional[Union[str, List[str]]] = None,
(...)
184 headless: bool = False,
185 ):
186 """Initialize the WhoScored reader."""
--> 187 super().__init__(
188 leagues=leagues,
189 proxy=proxy,
190 no_cache=no_cache,
191 no_store=no_store,
192 data_dir=data_dir,
193 path_to_browser=path_to_browser,
194 headless=headless,
195 )
196 self.seasons = seasons # type: ignore
197 self.rate_limit = 5
File ~\anaconda3\Lib\site-packages\soccerdata\_common.py:403, in BaseSeleniumReader.__init__(self, leagues, proxy, no_cache, no_store, data_dir, path_to_browser, headless)
400 self.headless = headless
402 try:
--> 403 self._driver = self._init_webdriver()
404 except WebDriverException as e:
405 logger.error(
406 """
407 The ChromeDriver was unable to initiate/spawn a new
(...)
411 e,
412 )
File ~\anaconda3\Lib\site-packages\soccerdata\_common.py:440, in BaseSeleniumReader._init_webdriver(self)
438 chrome_options.add_argument("--proxy-server=" + proxy_str)
439 chrome_options.add_argument("--host-resolver-rules=" + resolver_rules)
--> 440 return uc.Chrome(options=chrome_options)
File ~\anaconda3\Lib\site-packages\undetected_chromedriver\__init__.py:258, in Chrome.__init__(self, options, user_data_dir, driver_executable_path, browser_executable_path, port, enable_cdp_events, desired_capabilities, advanced_elements, keep_alive, log_level, headless, version_main, patcher_force_close, suppress_welcome, use_subprocess, debug, no_sandbox, user_multi_procs, **kw)
251 self.patcher = Patcher(
252 executable_path=driver_executable_path,
253 force=patcher_force_close,
254 version_main=version_main,
255 user_multi_procs=user_multi_procs,
256 )
257 # self.patcher.auto(user_multiprocess = user_multi_num_procs)
--> 258 self.patcher.auto()
260 # self.patcher = patcher
261 if not options:
File ~\anaconda3\Lib\site-packages\undetected_chromedriver\patcher.py:178, in Patcher.auto(self, executable_path, force, version_main, _)
176 self.version_main = release.version[0]
177 self.version_full = release
--> 178 self.unzip_package(self.fetch_package())
179 return self.patch()
File ~\anaconda3\Lib\site-packages\undetected_chromedriver\patcher.py:287, in Patcher.fetch_package(self)
284 download_url %= (self.version_full.vstring, self.platform_name, zip_name)
286 logger.debug("downloading from %s" % download_url)
--> 287 return urlretrieve(download_url)[0]
File ~\anaconda3\Lib\urllib\request.py:241, in urlretrieve(url, filename, reporthook, data)
224 """
225 Retrieve a URL into a temporary location on disk.
226
(...)
237 data file as well as the resulting HTTPMessage object.
238 """
239 url_type, path = _splittype(url)
--> 241 with contextlib.closing(urlopen(url, data)) as fp:
242 headers = fp.info()
244 # Just return the local path and the "headers" for file://
245 # URLs. No sense in performing a copy unless requested.
File ~\anaconda3\Lib\urllib\request.py:216, in urlopen(url, data, timeout, cafile, capath, cadefault, context)
214 else:
215 opener = _opener
--> 216 return opener.open(url, data, timeout)
File ~\anaconda3\Lib\urllib\request.py:525, in OpenerDirector.open(self, fullurl, data, timeout)
523 for processor in self.process_response.get(protocol, []):
524 meth = getattr(processor, meth_name)
--> 525 response = meth(req, response)
527 return response
File ~\anaconda3\Lib\urllib\request.py:634, in HTTPErrorProcessor.http_response(self, request, response)
631 # According to RFC 2616, "2xx" code indicates that the client's
632 # request was successfully received, understood, and accepted.
633 if not (200 <= code < 300):
--> 634 response = self.parent.error(
635 'http', request, response, code, msg, hdrs)
637 return response
File ~\anaconda3\Lib\urllib\request.py:563, in OpenerDirector.error(self, proto, *args)
561 if http_err:
562 args = (dict, 'default', 'http_error_default') + orig_args
--> 563 return self._call_chain(*args)
File ~\anaconda3\Lib\urllib\request.py:496, in OpenerDirector._call_chain(self, chain, kind, meth_name, *args)
494 for handler in handlers:
495 func = getattr(handler, meth_name)
--> 496 result = func(*args)
497 if result is not None:
498 return result
File ~\anaconda3\Lib\urllib\request.py:643, in HTTPDefaultErrorHandler.http_error_default(self, req, fp, code, msg, hdrs)
642 def http_error_default(self, req, fp, code, msg, hdrs):
--> 643 raise HTTPError(req.full_url, code, msg, hdrs, fp)
HTTPError: HTTP Error 404: Not Found
I'm not sure what's changed, or how to fix the issue. Any help greatly appreciated.
This is a problem with undetected_chromedriver
, not with soccerdata. Do you have the latest version of undetected_chromedriver
?
Yeah, reinstalled to make sure but still getting same error message
So far I've deleted and reinstalled: Python, Anaconda, VSC, Chrome, soccerdata, and undetected_chromedriver & have completed all possible updates yet nothing seems to work. Any ideas what else I could do to fix this?
I've tested it myself now and everything still seems to work fine. The error that you report seems very similar to an issue that was present in the previous version of undetected-chromedriver (see #485) but is fixed in the current version. You can check which version is installed by running:
>>> import undetected_chromedriver as uc
>>> uc.__version__
'3.5.5'
If you have the latest version and you're still encountering the problem, I recommend seeking help in the issue tracker of the undetected-chromedriver repo.