How do you parse HTML with a variety of languages and parsing libraries?
When answering:
Individual comments will be linked to in answers to questions
language: Python
library: HTMLParser
#!/usr/bin/python
from HTMLParser import HTMLParser
class FindLinks(HTMLParser):
def __init__(self):
HTMLParser.__init__(self)
def handle_starttag(self, tag, attrs):
at = dict(attrs)
if tag == 'a' and 'href' in at:
print at['href']
find = FindLinks()
html = ""
for link in ("foo", "bar", "baz"):
html += '%s' % (link, link)
html += ""
find.feed(html)