I am trying to get a value out of a HTML page using the python HTMLParser library. The value I want to get hold of is within this html element:
...
Little correction at Line 3
HTMLParser.HTMLParser.__init__(self)
it should be
HTMLParser.__init__(self)
The following worked for me though
import urllib2
from HTMLParser import HTMLParser
class MyHTMLParser(HTMLParser):
def __init__(self):
HTMLParser.__init__(self)
self.recording = 0
self.data = []
def handle_starttag(self, tag, attrs):
if tag == 'required_tag':
for name, value in attrs:
if name == 'somename' and value == 'somevale':
print name, value
print "Encountered the beginning of a %s tag" % tag
self.recording = 1
def handle_endtag(self, tag):
if tag == 'required_tag':
self.recording -=1
print "Encountered the end of a %s tag" % tag
def handle_data(self, data):
if self.recording:
self.data.append(data)
p = MyHTMLParser()
f = urllib2.urlopen('http://www.someurl.com')
html = f.read()
p.feed(html)
print p.data
p.close()
`