import requests
import re
def picdownload(pic):
#定义被爬取的url
url = "http://image.baidu.com/search/index?tn=baiduimage&ps=1&ct=201326592&lm=-1&cl=2&nc=1&ie=utf-8"
#定义请求头
headers = {
"User-Agent":"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.149 Safari/537.36"
}
kw = {
"word":pic
}
#发起get请求
response = requests.get(url,headers=headers, params=kw)
#对respons对象解码(utf-8)
content = response.content.decode('utf8')
#进行数据提取
detaill_urls = re.findall('"objURL":"(.*?)"',content,re.DOTALL)
#print(detaill_urls)
#图片的下载
i = 0
for detaill_url in detaill_urls:
try:
#得到猫的图片的响应
response = requests.get(detaill_url, headers=headers)
#得到猫的图片的内容,为bytes流
content = response.content
if detaill_url[-3:] == 'jpg':
with open('{0}{1}.jpg'.format(pic,i),'wb') as f:
f.write(content)
elif detaill_url[-4:] == 'jpeg':
with open('{0}{1}.jpeg'.format(pic,i),'wb') as f:
f.write(content)
elif detaill_url[-3:] == 'png':
with open('{0}{1}.png'.format(pic,i),'wb') as f:
f.write(content)
elif detaill_url[-3:] == 'bmp':
with open('{0]{1}.bmp'.format(pic,i),'wb') as f:
f.write(content)
else:
continue
except:
continue
i+=1
调用上述函数进行爬取
from picdownload import picdownload
pic = input("你想爬取什么图片?\n")
picdownload(pic)