翻页爬取博客:https://blog.csdn.net/qq_54528857/article/details/122548517
运行结果:
源码
import requests
# 使用正则表达式
import re
import os
# 定义变量
n = 1
# 在当前路径下创建用于存储图片的文件夹
os.mkdir('./美女')
url = 'https://image.baidu.com/search/index?tn=baiduimage&ipn=r&ct=201326592&cl=2&lm=-1&st=-1&fm=index&fr=&hs=0&xthttps=111110&sf=1&fmq=&pv=&ic=0&nc=1&z=&se=1&showtab=0&fb=0&width=&height=&face=0&istype=2&ie=utf-8&word=%E7%BE%8E%E5%A5%B3&oq=%E7%BE%8E%E5%A5%B3&rsp=-1'
header = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.110 Safari/537.36',
'Cookie': 'BDqhfp=%E5%91%A8%E6%9D%B0%E4%BC%A6%26%26NaN-1undefined%26%260%26%261; BIDUPSID=8B2FE044F7C7A1936A5C3FEDD5393193; PSTM=1634620199; BAIDUID=8B2FE044F7C7A19337ED870A99C0CCD5:FG=1; __yjs_duid=1_9870d0e3e5252cfb365e9c897e7ea7911635247578446; BAIDUID_BFESS=8B2FE044F7C7A19337ED870A99C0CCD5:FG=1; H_WISE_SIDS=107317_110085_127969_179345_180276_181588_182237_183328_184009_184267_184440_185268_185634_185650_186159_186316_186635_186669_187432_187820_187877_188182_188331_188453_188469_188552_189037_189325_189395_189731_189755_189861_189975_190114_190248_190473_190510_190651_190654_190681_190756_190779_190804_191242_191287_191369_191421_191433_191501_191640_191810_192259_192359_192382_192600_192874_192904_193006_193110_193194_193211_193283_193349_193395_193409_193493_193701_193756_193891_194038_194116_194317_194381_194511_194519_194582_194612_194674_194707_194862_194987_195149_195178_195342_8000081_8000109_8000114_8000133_8000138_8000144_8000150_8000156_8000168_8000177_8000181_8000185; ZFY=5ru9nyqtj58SlUuDnWxscIVzo0LdU4N6K1TXtM3uL4c:C; BAIDU_WISE_UID=wapp_1638442407492_371; BDRCVFR[dG2JNJb_ajR]=mk3SLVN4HKm; userFrom=null; BDRCVFR[-pGxjrCMryR]=mk3SLVN4HKm; ab_sr=1.0.1_YTA3Y2M1OWZmMGQyMWVhYjMwMjFkNzc1NmEyZWU1YzlmOTExMWY0YTZkZTkxZGE0YWYzNzhjYzllMmM3NzhkYjk5YTIzNWU0ZGQ2NjQ0ZWQ5MjZkMzcwZTM3MDNiNjBiODBjNjczMmY2M2YyMGIyYjE0YzI3NTRjYzEzZGU5MmFmMjI0MWU4OGRmMWNiYThjZTIyM2U4NWIzMjAzMzE3ZA=='
}
response = requests.get(url, headers=header).content
# 使用正则表达式
regular = '"objURL":"(.*?)"'
url_list = re.findall(regular, str(response), re.S)
# print(url_list)
# 遍历url列表
for i in url_list:
data = requests.get(url=i, headers=header).content
path = './美女/' + str(n) + '.jpg'
with open(path, 'wb') as f:
f.write(data)
n += 1
print(i)