Python 3.4+
Unittest tested from Python 3.4 to 3.8
If python3 is the default python binary
pip install pynav2
If python2 is the default python binary
pip3 install pynav2
GNU LGPLv3 (GNU Lesser General Public License Version 3)
Required for all examples
from pynav2 import Browser
b = Browser()
Get http://example.com (use https if available on server)
>>> b.get('example.com')
<Response [200]>
>>> b.text # alias for b.response.text
'<!DOCTYPE html>\n<html lang="mul" class="no-js">\n<head>\n<meta charset="utf-8">\n<title>example.com</title>...'
Get http://example.com/user-agent/json wich return a the json-encoded content of a response if nay
>>> b.get('example.com/user-agent/json')
<Response [200]>
>>> b.json # alias for b.response.json()
{'user-agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64; rv:56.0) Gecko/20100101 Firefox/56.0'}
>>> data = {'q': 'python'}
>>> b.post('example.com/search', data=data)
<Response [200]>
>>> b.text
'<!DOCTYPE html>\n<html lang="mul" class="no-js">\n<head>\n<meta charset="utf-8">\n<title>example.com</title>...'
>>> import json
>>> data = {'login': 'user', 'password': 'pass'}
>>> b.post('example.com/login', json=json.dumps(data)) # json to send in the body of the request
<Response [200]>
>>> b.json
{'login': 'success'}
>>> b.head('example.com')
<Response [200]>
>>> b.response.headers
{'Server': 'nginx', 'Content-Type': 'text/html; charset=utf-8', 'Content-Length': '48842', 'Age': '3154', 'Connection': 'keep-alive'}
>>> data = {'version': '2.1', 'licence': 'LGPL'}
>>> b.put('example.com/api/about/', data=data)
<Response [200]>
>>> b.json
{'update': 'success'}
>>> data = {'version': '2.1'}
>>> b.patch('example.com/api/about/', data=data)
<Response [200]>
>>> b.json
{'patch': 'success'}
>>> b.delete('example.com/api/user/102')
<Response [200]>
>>> b.json
{'delete': 'success'}
>>> b.options('example.com/api/user')
<Response [200]>
>>> b.json
{'options': '...'}
>>> b.get('example.com')
<Response [200]>
>>> b.links
['http://example.com/news', 'http://example.com/forum', 'http://example.com/contact']
>>> for link in b.links:
... print(link)
...
http://example.com/news
http://example.com/forum
http://example.com/contact
Any beautifulSoup.find_all() parameter can be added, see Beautiful Soup documentation
>>> import re
>>> b.get('example.com')
<Response [200]>
>>> b.get_links(text='Python Events') # regular expression
>>> b.get_links(class_="jump-link") # no regular expression for class attribute
>>> b.get_links(href="windows") # regular expression
>>> b.get_links(title=re.compile('success')) # manual regular expression
>>> b.get('example.com')
<Response [200]>
>>> b.images
['http://example.com/img/logo.png', 'http://example.com/img/picture.jpg', 'http://there.com/news.gif']
Any beautifulSoup.find_all() parameter can be added, see Beautiful Soup documentation
>>> b.get('example.com')
<Response [200]>
>>> b.get_images(src='logo') # regular expression
>>> b.get_images(class_='python-logo') # no regular expression for class attribute
>>> b.get_images(alt='yth') # regular expression
>>> b.verbose=True
>>> b.download('http://example.com/ubuntu-amd64', '/tmp') # it will follow redirect and look for headers content-disposition to find filename
downloading ubuntu-18.04.1-desktop-amd64.iso (1.8 GB) to: /tmp/ubuntu-18.04.1-desktop-amd64.iso
download completed in 12 minutes 5 seconds (1.8 GB)
>>> b.handle_referer = True
>>> b.get('somewhere.com')
>>> b.get('example.com') # request headers will have http://somewhere.com as referer
>>> b.get('there.com') # request headers will have http://example.com as referer
>>> b.referer = 'http://www.here.com'
>>> b.get('example.com') # request headers will have http://here.com as referer
useragent module include a list of user-agents :
firefox_windows, chrome_windows, edge_windows, ie_windows, firefox_linux, chrome_linux, safari_mac
Default user-agent is firefox_windows
>>> from pynav2 import useragent
>>> b.user_agent = useragent.firefox_linux
>>> b.get('example.com') # request headers will have 'Mozilla/5.0 (X11; Linux x86_64; rv:56.0) Gecko/20100101 Firefox/56.0' as User-Agent
>>> b.user_agent = 'my_app/v1.0'
>>> b.get('example.com') # request headers will have my_app/v1.0 as User-Agent
>>> b.set_sleep_time(0.5, 1.5) # random x seconds between 0.5 to 1.5 seconds and wait x before each request
>>> b.get('example.com') # wait x seconds before request
10 seconds timeout
>>> b.timeout = 10
>>> b.get('example1.com')
>>> b.get('example2.com')
>>> b.get('example3.com')
>>> b.session.close()
For SOCKS proxies see Requests documentation
>>> b.get('https://httpbin.org/ip').json()['origin']
111.111.111.111
>>> proxies = {'https':'10.0.0.0:1234'}
>>> b.timeout = 10 # could be useful to wait 10 seconds if proxies are slow
>>> b.get('https://httpbin.org/ip', proxies=proxies).json()['origin']
10.0.0.0
For SOCKS proxies see Requests documentation
>>> b.get('https://httpbin.org/ip').json()['origin']
111.111.111.111
>>> b.proxies = {'https':'10.0.0.0:1234'}
>>> b.timeout = 10 # could be useful to wait 10 seconds if proxies are slow
>>> b.get('https://httpbin.org/ip').json()['origin']
10.0.0.0
For SOCKS proxies see Requests documentation
>>> b.get('https://httpbin.org/ip').json()['origin']
111.111.111.111
>>> b.proxies = {'https':'10.0.0.0:1234', 'https://specific-domain.com' : '10.11.12.13:1234'}
>>> b.timeout = 10 # could be useful to wait 10 seconds if proxies are slow
>>> b.get('https://httpbin.org/ip').json()['origin']
10.0.0.0
>>> b.get('https://specific-domain.com/ip').json()['origin']
10.11.12.13
After a get or post request, Browser.bs (beautifulsoup) is automatically initiated with b.response.text
See Beautifll Soup documentation
>>> b.get('example.com')
>>> b.bs.find_all('a')
>>> b.get('example.com')
>>> b.session
>>> b.request
>>> b.response
>>> b.get('example1.com')
>>> b.get('example2.com')
>>> b.get('example3.com')
>>> print b.history
['example1.com', 'example2.com', 'example3.com']
>>> import urllib3
>>> urllib3.disable_warnings()
>>> b.get('example.com') # no warnings