使用正则爬取糗事百科段子,并保存为文本
import requests
import re
from fake_useragent import UserAgent
url='https://www.qiushibaike.com/text/page/1/'
headers={
'User-Agent':UserAgent().random
}
response=requests.get(url,headers=headers)
info=response.text
#使用正则提取
infos=re.findall(r'<div class="content">\s*<span>\s*(.+)\s*</span>', info)
#保存
with open('duanzi.txt','a',encoding='utf-8') as f:
for info in infos:
f.write(info + "\n\n\n")