#糗事百科段子爬虫
#用户代理
#多行匹配:需要用到模式修正符S 让.匹配包括换行符
import re
import urllib.request
head=("User-Agent","Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.25 Safari/537.36 Core/1.70.3641.400 QQBrowser/10.4.3284.400")
opener=urllib.request.build_opener()
opener.addheaders=[head]
urllib.request.install_opener(opener)
for i in range(0,13):
thisurl="https://www.qiushibaike.com/text/page/"+str(i+1)+"/"
data=urllib.request.urlopen(thisurl).read().decode("utf-8","ignore")
pat='<div class="content">.*?<span>(.*?)</span>.*?</div>'
rst=re.compile(pat,re.S).findall(data)
for j in range(0,len(rst)):
print(rst[j])
print("-------------------")
糗事百科段子爬虫
最新推荐文章于 2024-07-12 16:16:27 发布