i want the following functionality.
input : this is test bold text normal text
expected output: this is test normal text
Looks like you want HTMLParser. (html.parser in Python 3.)
from HTMLParser import HTMLParser
from sys import stdout
class Filter(HTMLParser):
def __init__(self, ignored_tags):
super(Filter, self).__init__()
self.ignorelevel = 0
self. ignored_tags = ignored_tags
def handle_starttag(self, tag, attrs):
if self.ignorelevel > 0:
self.ignorelevel += 1
elif tag in self.ignored_tags:
self.ignorelevel = 1
else:
# One of these two. Test and see.
stdout.write(self.get_starttag_text())
#stdout.write('<' + self.get_starttag_text() + '>')
def handle_startendtag(self, tag, attrs):
if self.ignorelevel == 0 and tag not in self.ignored_tags:
# One of these two. Test and see.
stdout.write(self.get_starttag_text())
#stdout.write('<' + self.get_starttag_text() + '/>')
def handle_endtag(self, tag):
if self.ignorelevel > 0:
self.ignorelevel -= 1
if self.ignorelevel > 0:
return
stdout.write('' + tag + '>')
def handle_data(self, data):
stdout.write(data)
def handle_charref(self, name):
stdout.write('' + name + ';')
def handle_entityref(self, name):
stdout.write('&' + name + ';')
def handle_comment(self, data):
stdout.write('')
def handle_decl(self, data):
stdout.write('')
def handle_pi(self, data):
stdout.write('' + data + '>')