# 解析下载图片数据
# 网址: https://pic.netbian.com/4kmeinv/
import requests
from lxml import etree
import os
if not os.path.exists('./meinvImage'):
os.mkdir('./meinvImage')
headers = {
'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/89.0.4389.90 Safari/537.36'
}
url = 'https://pic.netbian.com/4kmeinv/index_%d.html'
for pageNum in range(2, 174):
new_url = format(url%pageNum)
image = requests.get(url=new_url, headers=headers)
# 收到设置响应编码格式
# image.encoding= 'utf-8'
text = image.text
# print(image)
tree = etree.HTML(text) # 实例化
tupian = tree.xpath('//ul[@class="clearfix"]/li')
# print(tupian)
for tupian01 in tupian:
a = tupian01.xpath('./a/img/@src')[0]
# print(a)
b = 'https://pic.netbian.com/' + a
c = tupian01.xpath('./a/img/@alt')[0] + '.jpg'
# 通用处理中文乱码的解决方案
c = c.encode('iso-8859-1').decode('gbk')
# 请求图片进行持久化存储
data = requests.get(url=b, headers=headers).content
image_path = 'meinvImage/' + c
with open(image_path, 'wb') as fp:
fp.write(data)
print('下载成功!!!')
``
爬取壁纸图片
最新推荐文章于 2024-11-15 10:17:44 发布