爬取百度图片
import os
import re
import requests
def downloadJpg(KeyWord, page, saveAdr=r'D:\爬取百度图片'):
try:
os.makedirs(fr'{saveAdr}\{KeyWord}')
except:
pass
headers_Chrome = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/88.0.4324.104 Safari/537.36',
}
count = 0
for p in range(1, page + 1):
pn = p * 30
url = fr'https://image.baidu.com/search/acjson?tn=resultjson_com&logid=8182938565380378412&ipn=rj&ct=201326592&is=&fp=result&queryWord={word}&cl=2&lm=-1&ie=utf-8&oe=utf-8&adpicid=&st=-1&z=&ic=0&hd=&latest=©right=&word={word}&s=&se=&tab=&width=&height=&face=0&istype=2&qc=&nc=1&fr=&expermode=&nojc=&pn={pn}&rn=30&gsm=3c&1624250554413='
webData = requests.get(url=url, headers=headers_Chrome)
imagUrls = re.findall('"thumbURL":"(.+?)"', webData.text)
for url in imagUrls:
try:
count += 1
print(url)
r = requests.get(url, headers=headers_Chrome)
with open(rf'{saveAdr}\{KeyWord}\{KeyWord}-{count}.jpg', "wb") as code:
code.write(r.content)
except Exception as e:
print(f'错误 ---> {e}')
return False
print('全部下载完成 --> ')
if __name__ == '__main__':
word = input('请输入你需要下载的内容(关键词):')
pa = eval(input('请输入你需要下载的页数(30/页):'))
downloadJpg(word, pa)