« Back to Index

[Parsing HTML with Python standard library]

View original Gist on GitHub

Tags: #python3 #html #parse #ast

Python HTML Parser.py

from html.parser import HTMLParser


class MyHTMLParser(HTMLParser):
    def handle_starttag(self, tag, attrs):
        print("Encountered a start tag:", tag)

    def handle_data(self, data):
        print("Encountered some data  :", data)


parser = MyHTMLParser()
parser.feed(
    "<html><head><title>Test</title></head>"
    "<body><h1>Parse me!</h1><div class='d1__episodes'>"
    "<a href='/foo'>foo</a><a href='/bar'>bar</a></div></body></html>"
)