from mechanize import Browser
br = Browser()
br.open(\'http://somewebpage\')
html = br.response().readlines()
for line in html:
print line
When p
Here's my solution for python 3.
import html
import re
def html_to_txt(html_text):
## unescape html
txt = html.unescape(html_text)
tags = re.findall("<[^>]+>",txt)
print("found tags: ")
print(tags)
for tag in tags:
txt=txt.replace(tag,'')
return txt
Not sure if it is perfect, but solved my use case and seems simple.