提取网页标题
from html.parser import HTMLParser
class MyHTMLParser(HTMLParser):
ctag = False
def handle_starttag(self, tag:str, attrs:list):
#print('begin a tag:' + tag)
if tag == 'h2':
for attr in attrs:
if attr[1] == 'v7tit':
self.ctag = True
break
def handle_data(self, data:str):
#print('handle a tag')
if self.ctag == True:
print("Extracted data :",data)
def handle_endtag(self, tag:str):
#print('end a tag:' + tag)
self.ctag = False
parser = MyHTMLParser()
parser.feed('<html><head><title>Test</title></head>'
'<body><h2 allign = "v7tit">热点新闻</h2>'
)