import urllib2
website = \"WEBSITE\"
openwebsite = urllib2.urlopen(website)
html = getwebsite.read()
print html
So far so good.
But I wa
Here's a lazy version of @stephen's answer
import html.parser
import itertools
import urllib.request
class LinkParser(html.parser.HTMLParser):
def reset(self):
super().reset()
self.links = iter([])
def handle_starttag(self, tag, attrs):
if tag == 'a':
for (name, value) in attrs:
if name == 'href':
self.links = itertools.chain(self.links, [value])
def gen_links(stream, parser):
encoding = stream.headers.get_content_charset() or 'UTF-8'
for line in stream:
parser.feed(line.decode(encoding))
yield from parser.links
Use it like so:
>>> parser = LinkParser()
>>> stream = urllib.request.urlopen('http://stackoverflow.com/questions/3075550')
>>> links = gen_links(stream, parser)
>>> next(links)
'//stackoverflow.com'