百度图片的结果是滑动加载的, 并不是静态的页面, 需要不断下滑, 才能不断更新
下面案例的关键字是"奥运会"
import requests
import os
from bs4 import BeautifulSoup
base_url = "https://image.baidu.com/search/acjson?"
keyword = "奥运会"
params = {'tn': 'resultjson_com',
'logid': '7293709351117530629',
'ipn': 'rj',
'ct': 201326592,
'is': '',
'fp': 'result',
# 'queryWord': keyword,
'cl': 2,
'lm': -1,
'ie': 'utf-8',
'oe': 'utf-8',
'adpicid': '',
'st': -1,
'z': '',
'ic': '',
'hd': '',
'latest': '',
'copyright': '',
# 'word': keyword,
's': '',
'se': '',
'tab': '',
'width': '',
'height': '',
'face': 0,
'istype': 2,
'qc': '',
'nc': '1',
'fr': '',
'expermode': '',
'force': '',
'cg': '',
'pn': 30, # 30 60 ...
'rn': 30,
'gsm': '1e',
'1623811985902': ''
}
params['queryWord'] = keyword
params['word'] = keyword
headers = {'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9',
'Accept-Encoding': 'gzip, deflate, br',
'Accept-Language': 'zh-CN,zh;q=0.9,en-US;q=0.8,en;q=0.7',
'Cache-Control': 'max-age=0',
'Connection': 'keep-alive',
'sec-ch-ua': '"Google Chrome";v="89", "Chromium";v="89", ";Not A Brand";v="99"',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/89.0.4389.82 Safari/537.36'}
# 在当前路径下,创建存放图片文件的文件夹
path = "./" + keyword
if(not os.path.exists(path)):
os.makedirs(path)
# 下载多页
i = 0 # 文件编号
# 爬取5页或者是加载5次的图片
pages = 3
for page in range(pages):
params['pn'] = (page + 1)*30
response = requests.get(url=base_url, params=params, headers=headers)
image_dict = response.json()
image_list = image_dict["data"]
# print(image_list)
try:
for image in image_list: # 下载当前页所有图片
url = image['middleURL'] # middleURL hoverURL
img_response = requests.get(url=url, headers=headers)
# 写入文件
file_name = path+"/image"+str(i)+".jpg"
with open(file_name, 'wb') as f:
f.write(img_response.content)
i += 1
# print(f"下载{i}张")
except Exception as e:
# 因为每个json最后有个是空的字典, 所以要进行一下异常处理
print("出错了:", e)
print(f"已经爬取了{i}张图片")
这段代码可以直接将图片保存到本地(会自动创建一个以关键字命名的文件夹), 如下图所示