# 解析下载图片数据
# https://pic.netbian.com/4kmeinv/
import requests
from lxml import etree
import os
url = ' https://pic.netbian.com/4kmeinv/'
headers ={"User-Agent":'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/106.0.0.0 Safari/537.36'}
res = requests.get(url = url,headers = headers)
# 手动设定响应格式,将其设置成utf-8格式
# res.encoding = 'utf-8'
# 实例化对象并进行解析
tree = etree.HTML(res.text)
# 数据解析:src的属性值 alt的属性
li_list = tree.xpath('//ul[@class="clearfix"]/li')
# 创建一个文件夹
if not os.path.exists('.picLibs'):
os.mkdir('./picLibs')
for li in li_list:
img_src = 'https://pic.netbian.com/'+li.xpath('./a/img/@src')[0]
title = li.xpath('./a/img/@alt')[0]+'.jpg'
#通用处理中文乱码的解决方案,(当更改完编码格式后,需要重新给他赋值)
img_name = title.encode('iso-8859-1').decode('gbk')
#请求图片进行持久化存储
img_data = requests.get(url = img_src,headers = headers).content
img_path = 'picLibs/'+img_name
with open(img_path,'wb') as fb:
fb.write(img_data)
print(img_name,"下载成功")
使用xpath来爬取图片并保存到本地