爬虫
import requests
import os
from lxml import etree
try:
os.mkdir('./4k动物爬取')
except Exception:
print('文件已创建!')
for i in range(2,172):
i = str(i)
ml_url = 'https://pic.netbian.com/4kdongwu//index_' + i + '.html'
header = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chromeh/81.0.4044.138 Safari/537.36'
}
response = requests.get(url=ml_url,headers=header)
res=response.text.encode('iso-8859-1')
treee = etree.HTML(res)
t = treee.xpath('//div[@class="slist"]/ul/li')
for tu in t:
http = 'http://pic.netbian.com'
tit = tu.xpath('./a/img/@alt')[0]
tp = http + tu.xpath('./a/img/@src')[0]
img = requests.get(url=tp,headers=header).content
imgpath = './4k动物爬取/' + tit + '.jpg'
with open(imgpath, 'wb') as fp:
fp.write(img)
print(tit,'下载成功!')