strip tags python

后端 未结 9 1641
深忆病人
深忆病人 2020-12-17 23:07

i want the following functionality.

input : this is test  bold text  normal text
expected output: this is test normal text
9条回答
  •  误落风尘
    2020-12-17 23:20

    Looks like you want HTMLParser. (html.parser in Python 3.)

    from HTMLParser import HTMLParser
    from sys import stdout
    class Filter(HTMLParser):
        def __init__(self, ignored_tags):
            super(Filter, self).__init__()
            self.ignorelevel = 0
            self. ignored_tags = ignored_tags
        def handle_starttag(self, tag, attrs):
            if self.ignorelevel > 0:
                self.ignorelevel += 1
            elif tag in self.ignored_tags:
                self.ignorelevel = 1
            else:
                # One of these two.  Test and see.
                stdout.write(self.get_starttag_text())
                #stdout.write('<' + self.get_starttag_text() + '>')
        def handle_startendtag(self, tag, attrs):
            if self.ignorelevel == 0 and tag not in self.ignored_tags:
                # One of these two.  Test and see.
                stdout.write(self.get_starttag_text())
                #stdout.write('<' + self.get_starttag_text() + '/>')
        def handle_endtag(self, tag):
            if self.ignorelevel > 0:
                self.ignorelevel -= 1
                if self.ignorelevel > 0:
                    return
            stdout.write('')
        def handle_data(self, data):
            stdout.write(data)
        def handle_charref(self, name):
            stdout.write('&#' + name + ';')
        def handle_entityref(self, name):
            stdout.write('&' + name + ';')
        def handle_comment(self, data):
            stdout.write('')
        def handle_decl(self, data):
            stdout.write('')
        def handle_pi(self, data):
            stdout.write('')
    

提交回复
热议问题