import requests
import os
from lxml import etree
# 创建目录方法
def create_file(file_path):
if not os.path.exists(file_path):
os.makedirs(file_path)
#url = 'https://wk.baidu.com/view/526297e64531b90d6c85ec3a87c24028905f8521'
#url = 'https://wenku.baidu.com/view/c618583dfc00bed5b9f3f90f76c66137ee064fbb.html?fr=income11-doc-search&_wkts_=1708924835319&wkQuery=%E6%96%B0%E5%86%A0%E8%82%BA%E7%82%8E'
#url = 'https://wenku.baidu.com/view/1fea0e827275a417866fb84ae45c3b3566ecdddd?aggId=fbb4fc21a0c7aa00b52acfc789eb172dec6399eb&fr=catalogMain_text_ernie_recall_backup_new%3Awk_recommend_main4&_wkts_=1708925067387&wkQuery=%E5%9B%BE%E7%89%87'
url = 'https://wenku.baidu.com/view/5a2b440474232f60ddccda38376baf1ffc4fe3c9.html?fr=income3-doc-search&_wkts_=1708926255145&wkQuery=%E8%AF%BE%E4%BB%B6'
#将想要的图片在百度文库地址,放在url = ''里面
resp = requests.get(url)
# print(resp.text)
text = resp.text
html = etree.HTML(text)
img_list = html.xpath('//div[@class="mod flow-ppt-mod"]/div/div/img')
# 计数
cnt = 1
# 文件保存路径
file_path = './wendang/'
create_file(file_path)
# 获取图片
for i in img_list:
try:
img_url = i.xpath('./@src')[0]
except:
img_url = i.xpath('./@data-src')[0]
# 文件名称
file_name = f'{file_path}page_{cnt}.jpg'
print(file_name, img_url)
# 下载保存图片
resp = requests.get(img_url)
with open(file_name, 'wb') as f:
f.write(resp.content)
cnt += 1
结果