import requests
from lxml import etree
from bs4 import BeautifulSoup
import re,csv,json
# url = 'https://image.baidu.com/search/albumsdata?pn=90&rn=30&tn=albumsdetail&word=渐变风格插画&album_tab=设计素材&album_id=409&ic=0&curPageNum=3'
# img_url = json['albumdata']['linkData'][0]['thumbnailUrl']
headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/99.0.4844.82 Safari/537.36'}
url = 'https://image.baidu.com/search/albumsdata?tn=albumsdetail&word=城市建筑摄影专题&album_tab=建筑&album_id=7&ic=0'
def get_img_url():
for i in range(1,21):
print('正在处理第{}页数据...'.format(str(i)))
params = {'pn':str(30*i),'rn':'30','curPageNum':str(i)}
sess = requests.Session()
res = sess.get(url,headers=headers,params=params)
json = res.json()
linkData = json['albumdata']['linkData']
yield linkData
def get_img_and_save(linkData):
for link in linkData:
sess = requests.Session()
img_url = link['thumbnailUrl']
res = sess.get(img_url,headers=headers)
file = './a_img/{}.png'.format(str(link['pid']))
print('正在保存图片...{}'.format(file))
with open(file,'wb') as f:
f.write(res.content)
for linkData in get_img_url():
get_img_and_save(linkData)
print('图片爬取完成!')
python爬取百度图片某专辑数据
最新推荐文章于 2024-10-18 00:00:00 发布
该代码段使用Python的requests,lxml和BeautifulSoup库解析百度图片专辑数据,通过遍历多页获取图片链接,并使用requests下载图片,保存至本地。爬虫针对特定关键词‘城市建筑摄影专题’,并动态调整页码以抓取更多图片。
摘要由CSDN通过智能技术生成