from urllib import request
import re
class Spider():
url = 'https://www.panda.tv/'
# 使用组取两个定位标签中间的内容
root_pattern = '
([\s\S]*?)
'
def __fetch_content(self):
r = request.urlopen(Spider.url)
htmls = r.read()
htmls = str(htmls, encording='utf-8') # 这句报错了 我的python版本是3.6
return htmls
def __analysis(self, htmls):
root_html = re.findall(Spider.root_pattern, htmls)
def go(self):
htmls = self.__fetch_content()
self.__analysis(htmls)
spider = Spider()
spider.go()