# Linux
python3 -m pip install parser-html
# Windows
python -m pip install parser-html
# Build from source
python -m pip install git+https://github.com/AaravMalani/htmlparse
Usage
importhtmlparsewithopen('index.html', 'r') asf:
element=htmlparse.parse_html(f.read())
ifnotelement:
raiseValueError("Parsing failed!")
print(element.children) # Sub-elementsprint(element.innerHTML) # Data enclosed by tagprint(element.outerHTML) # Data enclosed by tag as well as the tag itselfelement.innerHTML='e>'# Rebuilds this element and sets the innerHTML of all the parent elementsprint(element.children) # ['e>'] (The HTMLText element is represented as a string literal)print(element.children[0].text) # e> (Use HTMLText.outerHTML for an HTML escaped string (e>) however don't set it)element.outerHTML='<div class="black blue"><a href="https://github.com/" id="abc"></div>'# Read above statement# assigning to element.children is in the worksprint(tag.attrs) # {"href":"https://github.com/", "id":"abc"}print(tag.tag_name) # aelement.children= []
element.attrs= {} # WARNING! You have to set it, you can't do element.attrs.update or element.attrs |=print(tag.outerHTML) # <div></div>
ToDo
Support for CSS styles
Support for JS scripts
Support for assignment to HTMLElement.children list