import requests
import os
#获取
def getPages(kw,num):
params = []
for j in range(0,3):
params.append({
'tn':'resultjson_com',
'ipn':'r',
'ct':'201326592',
'cl':'2',
'lm':'-1',
'st':'-1',
'fm':'result',
'fr':'',
'sf':'1',
'fmq':'1601464617281_R',
'pv':'',
'ic':'0',
'nc':'1',
'z':'',
'hd':'',
'latest':'',
'copyright':'',
'se':'1',
'showtab':'0',
'fb':'0',
'width':'',
'height':'',
'face':'0',
'istype':'2',
'ie':'utf-8',
'ctd':'1601464617282^00_1025X891',
'sid':'',
'word':kw
})
url = 'https://image.baidu.com/search/acjson'
headers = {
'User-Agent': 'Mozilla / 5.0(Windows NT 10.0; WOW64) AppleWebKit / 537.36(KHTML, like Gecko) Chrome / 72.0.3626.81 Safari / 537.36 SE 2.X MetaSr 1.0'
}
urls = []
for i in params:
res = requests.get(url,headers = headers,params=i,allow_redirects=True)
urls.append(res.json()['data'])
return urls
#保存
def downloadimg(datalist,dir):
#检测文件夹是否存在
if not os.path.exists(dir):
os.mkdir(dir)
#循环下载图片数据
x=0
for data in datalist:
for i in data:
if i:
#向图片地址发起请求
imgres = requests.get(i.get("hoverURL"))
open(dir+'/'+str(x)+'.jpg','wb+').write(imgres.content)
x+=1
#获取用户输入信息
keyword = input("请输入搜索图片关键字:")
#调用函数,进行数据的爬取,可以指定关键字和下载页数
pages = input("请输入爬取页数:")
datalist = getPages(keyword,int(pages))
#调用函数,保存数据
downloadimg(datalist,'./baidu')
python 爬取百度图片
最新推荐文章于 2024-07-07 09:46:06 发布