一.高清壁纸批量下载
1.概述
此代码是使用python多线程批量下载高清壁纸的一个小脚本,代码略为简陋.
此代码仅供学习与交流,请不要用于违法用途.
import requests
from lxml import etree
import re
import time
from concurrent.futures import ThreadPoolExecutor, wait, ALL_COMPLETED
# 爬取高清壁纸排行榜
# 获取图片下载地址
def getPicUrl():
url = "https://wallhaven.cc/toplist?page=9999"
req = requests.get(url);
html = etree.HTML(req.text)
# 通过获取报错页面获取最大页数
img_list = html.xpath('//*[@id="thumbs"]/section[1]/header/h2/text()[2]')[0]
comp = re.compile("-?[1-9]\d*")
list_int = comp.findall(img_list)[0]
print("一共有"+list_int+"页")
for i in range(1,int(list_int)): #可修改int(list_int)指定需要下载的页数
url = "https://wallhaven.cc/toplist?page={}".format(i)
req = requests.get(url);
html = etree.HTML(req.text)
for j in range(1,30): # 每一页的图片数
try:
'''
1.首先获取低画质图片下载链接: th.wallhaven.cc/small/kx/kx98xd.jpg
2.找到高画质图片下载链接: w.wallhaven.cc/full/kx/wallhaven-kx98xd.jpg
3.找相同之处拼接url
4.将url添加到列表
'''
urlXpath='//*[@id="thumbs"]/section/ul/li[{}]/figure/img/@data-src'.format(j)
img_list = html.xpath(urlXpath)[0]
newUrl=str(img_list).split("/")
newImgUrl="https://w.wallhaven.cc/full/"+ newUrl[4] + "/wallhaven-" + newUrl[-1]
picList.append(newImgUrl) #将图片下载地址添加到列表
except(IndexError): #数组越界异常处理
print("本页结束")
break
# 下载图片
def picDownload(a):
try:
imgname = a.split("/")[-1] #设置图片名
img = requests.get(a,headers={'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.45 Safari/537.36'},timeout=5)
with open ("E:/img/"+imgname,mode='wb') as f:
f.write(img.content)
time.sleep(0.1)
except:
print("下载失败")
pass
if __name__ == '__main__':
start_time = time.time()
picList = []
getPicUrl()
#单线程
# for i in range(0,len(picList)):
# picDownload(picList[i])
#多线程
executor = ThreadPoolExecutor(max_workers=10) # 线程个数
# submit()的参数:第一个为函数, 之后为该函数的传入参数,允许有多个
future_tasks = [executor.submit(picDownload, i) for i in picList]
wait(future_tasks, return_when=ALL_COMPLETED)
# 等待所有的线程完成,才进入后续的执行
end_time = time.time()
print("完成时间: %s S" % (end_time - start_time))
爬取1页的效果,想下载多少可以自己去改