import requests
import re
from bs4 import BeautifulSoup
import json
for i in range(805228680,8052299980):
req = requests.get("https://www.duitang.com/blog/?id=%d"%i)##进入网址
html = req.content##赋值给HTML
soup = BeautifulSoup(html,'html.parser')##解析req
Find = soup.find_all("a","img-out")
start_find = 'href="'#开始
end_find = '"'##结束
if Find !=[]:
text = str(Find[0])##将查找到的转化为字符串格式
pat = re.compile(start_find+"(.*?)"+end_find,re.S)
##赋值
PAT = pat.findall(text)
#STR = str(PAT[0])#将查找到的转化字符串
#print(type(PAT[0]))
imgs = requests.get(PAT[0])
#print(imgs)
with open("C:/Users/17331/Desktop/wenjianjia/%d"%(i-805228680)+".jpg","wb") as F:
F.write(imgs.content)
else:
pass
print("正在下载{}".format(i-805228680))
python BeautifuSoup爬虫
最新推荐文章于 2024-07-05 22:58:15 发布