标题:爬取古诗文网古诗
# encoding=utf-8
import requests
import re
# 请求数据
def parse_page(url):
headers = {
'User-Agent':'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.25 Safari'
'/537.36 Core/1.70.3756.400 QQBrowser/10.5.4039.400'
}
response = requests.get(url,headers)
text = response.text
# 注: . 是匹配不到 \n 的
# 注: DOTALL 匹配所有的字符
titles = re.findall(r'<div\sclass="cont">.*?<b>(.*?)<b>',text,re.DOTA