python 爬虫简要示例,内容抓取、筛选、排序 ,工具 VScode https://code.visualstudio.com/Download
from urllib import request
import re
class Spider():
url = 'http://www.baidu.com'
root_pattern = '<li class="hotsearch-item([\s\S]*?)</li>'
name_pattern = '>([\d])</span>'
content_pattern = '<span class="title-content-title">([\s\S]*?)</span>'
def __fetch__content(self):
r = request.urlopen(Spider.url)
htmls = r.read()
htmls = str(htmls,encoding='utf-8')
return htmls
def __analysis(self,htmls):
root_html = re.findall(Spider.root_pattern,htmls)
anchors = []
for item in root_html:
num = re.findall(Spider.name_pattern,item)
content = re.findall(Spider.content_pattern,item)
list = {"num":num,"content":conte