爬取糗事百科实例
import requests
from fake_useragent import UserAgent
import re
headers = {
'User-Agent': UserAgent().chrome
}
url = 'https://www.qiushibaike.com/text/'
response = requests.get(url, headers=headers)
info = response.text
infos = re.findall(r'<div class="content">\s*<span>\s*(.+)\s*</span>', info)
with open('duanzi.txt', 'w', encoding='utf-8') as f:
for info in infos:
f.write(info + '\n\n\n')