import urllib.request
import ssl
import re
def jokeCrewler(url):
headers={
"User-Agent":"Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; 360SE)"
}
req=urllib.request.Request(url,headers=headers)
context =ssl._create_unverified_context()
response=urllib.request.urlopen(req,context=context)
HTML= response.read().decode("utf-8")
pat=r'<div class="author clearfix">(.*?)<span class="stats-vote"><i class="number">'
re_joke=re.compile(pat,re.S)
divlist=re_joke.findall(HTML)
dic={}
for div in divlist:
re_u=re.compile(r"<h2>(.*?)</h2>",re.S)
username=re_u.findall(div)
username=username[0]
re_d=re.compile(r'<div class="content">\n<span>(.*?)</span>',re.S)
duanzi = re_d.findall(div)
duanzi = duanzi[0]
dic[username]=duanzi
return dic
# with open(r"……,'w') as f:
# f.write(HTML)
url="https://www.qiushibaike.com/8hr/page/3/"
info=jokeCrewler(url)
for k,v in info.items():
print(k,v)