import requests#
import re#正则表达
from tqdm import tqdm#进度条
with open('Rose.html','r',encoding = 'utf-8') as fp:
data = fp.readline()
print(data)
#构造请求头
headers = {'access-control-allow-origin':'*',
'content-type':'image/webp',
'accept-language':'zh-CN,zh;q=0.9,en;q=0.8,en-GB;q=0.7,en-US;q=0.6',
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.114 Safari/537.36 Edg/91.0.864.59'}
#获取页面
def Gethtml(url):
response = requests.get(url,headers = headers)
if response.status_code==200:
Parsehtml(response.content.decode('utf-8'))
else:
print(response.status_code)
#解析页面
def Parsehtml(content):
URLS = re.findall('"thumbURL":"(.*?)"',content)
i = 0
for URL in URLS:
print(URL)
for Url in URLS:
response = requests.get(Url,headers = headers)
#保存图片
with open("E:\网络爬虫\爬虫图片下载\玫瑰花\玫瑰花{}.jpg".format(i),'wb') as f:
f.write(response.content)
i += 1
#入口函数
if __name__=="__main__":
#想要爬取的目标网址
url ='https://image.baidu.com/search/index?tn=baiduimage&ps=1&ct=201326592&lm=-1&cl=2&nc=1&ie=utf-8&word=%E7%8E%AB%E7%91%B0%E8%8A%B1'
Gethtml(url)
#获取的response.text的源代码不一样:headers 里加上Cookie
网络爬虫爬取百度图片
最新推荐文章于 2024-05-13 14:00:47 发布