import requests
from lxml import etree
from bs4 import BeautifulSoup
import re,csv,json
# url = 'https://image.baidu.com/search/albumsdata?pn=90&rn=30&tn=albumsdetail&word=渐变风格插画&album_tab=设计素材&album_id=409&ic=0&curPageNum=3'
# img_url = json['albumdata']['linkData'][0]['thumbnailUrl']
headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/99.0.4844.82 Safari/537.36'}
url = 'https://image.baidu.com/search/albumsdata?tn=albumsdetail&word=城市建筑摄影专题&album_tab=建筑&album_id=7&ic=0'
def get_img_url():
for i in range(1,21):
print('正在处理第{}页数据...'.format(str(i)))
params = {'pn':str(30*i),'rn':'30','curPageNum':str(i)}
sess = requests.Session()
res = sess.get(url,headers=headers,params=params)
json = res.json()
linkData = json['albumdata']['linkData']
yield linkData
def get_img_and_save(linkData):
for link in linkData:
sess = requests.Session()
img_url = link['thumbnailUrl']
res = sess.get(img_url,headers=headers)
file = './a_img/{}.png'.format(str(link['pid']))
print('正在保存图片...{}'.format(file))
with open(file,'wb') as f:
f.write(res.content)
for linkData in get_img_url():
get_img_and_save(linkData)
print('图片爬取完成!')