from urllib.request import urlopen
from urllib.request import Request
import re
headers = {
'User-Agent':
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.157 Safari/537.36'
}
url="http://www.doupoxs.com/doupocangqiong/1.html"
resp=Request(url,headers=headers)
response = urlopen(resp)
#使用正则表达式匹配信息
#re模块的findall(pattern,string[,flag])方法:在字符串 string 中查找正则表达式模式 pattern 的所有(非重复)出现;返回一个匹配对象的列表
res=re.findall('<p>(.*?)</p>',response.read().decode('utf-8'))
with open('E:/python/myPython/doupochangqiong.txt','a+') as f:
f.write(str(res))
print(res)
爬取小说斗破苍穹
最新推荐文章于 2021-11-04 22:04:46 发布