# 需要安装第三方的库 requests,lxml
import requests
import time
import random
import os
from lxml import etree
from concurrent.futures import ThreadPoolExecutor
path = 'D:/4K壁纸'
user_agent = [
"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.1 (KHTML, like Gecko) Chrome/22.0.1207.1 Safari/537.1",
"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/536.6 (KHTML, like Gecko) Chrome/20.0.1092.0 Safari/536.6",
"Mozilla/5.0 (Windows NT 6.2) AppleWebKit/536.6 (KHTML, like Gecko) Chrome/20.0.1090.0 Safari/536.6",
"Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.1 (KHTML, like Gecko) Chrome/19.77.34.5 Safari/537.1",
"Mozilla/5.0 (Windows NT 6.0) AppleWebKit/536.5 (KHTML, like Gecko) Chrome/19.0.1084.36 Safari/536.5",
"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1063.0 Safari/536.3",
"Mozilla/5.0 (Windows NT 5.1) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1063.0 Safari/536.3",
"Mozilla/5.0 (Windows NT 6.2) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1062.0 Safari/536.3",
"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1062.0 Safari/536.3",
"Mozilla/5.0 (Windows NT 6.2) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1061.1 Safari/536.3",
"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1061.1 Safari/536.3",
"Mozilla/5.0 (Windows NT 6.1) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1061.1 Safari/536.3",
"Mozilla/5.0 (Windows NT 6.2) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1061.0 Safari/536.3",
"Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/535.24 (KHTML, like Gecko) Chrome/19.0.1055.1 Safari/535.24",
'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.99 Safari/537.36'
]
# 随机选择UA
headers = {"User-Agent": random.choice(user_agent)}
# https://pic.netbian.com/4kmeinv/index_2.html
#获取每一个图片的清晰图
start=int(input('起始页:'))
end=int(input('结束页:'))
def download():
list=[]#存放第一个网址
for i in range(start,end+1):
if start==1:
url='https://pic.netbian.com/4kmeinv/'
else:
url='https://pic.netbian.com/4kmeinv/index_{}.html'.format(i)
res=requests.get(url,headers=headers)
# print(res)
res=res.content.decode('gbk')
code=etree.HTML(res)
lis=code.xpath('//*[@id="main"]/div[3]/ul/li')
ur_list=[li.xpath('./a/@href')[0] for li in lis]
rel_list=['https://pic.netbian.com'+i for i in ur_list]
# print(rel_list)
list.extend(rel_list)
return list
#解析新网页中的内容
def check_eyone(url):
res = requests.get(url, headers=headers)
code = etree.HTML(res.content.decode('gbk'))
img=code.xpath('//*[@id="img"]/img/@src')[0]
img_url='https://pic.netbian.com'+img
print(img_url)
# pic_name=code.xpath('//*[@id="main"]/div[2]/div[1]/div[1]/h1/text()')[0].split(' ')[0:2]
pic_name=img_url.split('/')[-2]+img_url.split('/')[-1]
#pic_name=pic_name+'.jpeg'
print(pic_name)
res2=requests.get(img_url)#保存图片必须到图片对应地址下载
#print(res2.text)
try:
os.mkdir(path)
except:
pass
with open(path+f'\{pic_name}','wb') as f:
f.write(res2.content)
print("正在下载")
time.sleep(1.5)
if __name__ == '__main__':
# print(download())
with ThreadPoolExecutor(50) as t:
t.map(check_eyone,download())#专门 针对列表参数的多线程,按顺序读取内容
#'//*[@id="i_cecream"]/main/section[1]/div[1]/div[2]/div[2]/div/div/a'