import requests
import threading
from queue import Queue
from lxml import etree
import time
import wget
import os
urlQueue=Queue()#链接队列
threads_num=100 #线程数
threads=[]
urList=[]
skin=[]
def getUrl(UrlList):
for i in range(1,43):
UrlList.append("http://lol.52pk.com/pifu/hero/hero_"+str(i)+".shtml")
return UrlList
def download(url):
try:
r=requests.get(url)
r.raise_for_status
print(r.url)
html=r.text
return html
except:
print("抓取失败")
def tree_parser(html):
try:
tre=etree.HTML(html)
skin=tre.xpath('//li[@class="boxShadow"]/a/img/@src')
return skin
except:
print("分析失败")
def downloads():
while True:
global urlQueue
url = urlQueue.get() # 取一个url
if url is None:
break
t = download(url)
skin=tree_parser(t)
for i in skin:
try:
path = "D:\\skin"
t = wget.download(i,os.path.join(path,i.split("1")[-1]))
except:
print("存储失败",i)
urlQueue.task_done()
print("剩余:",urlQueue.qsize())
def main():
List=getUrl(urList) #获取所有网址链接
for i in List:
urlQueue.put(i)
#启动线程,并放入线程池中
for i in range(threads_num):
t=threading.Thread(target=downloads())
t.start()
threads.append(t)
urlQueue.join()
#向队列发送n个None
for i in range(threads_num):
urlQueue.put("None")
for t in threads:
t.jion()
print("结束程序")
starttime=time.time()
main()
times = time.time() - starttime
print(times)
多线程抓取英雄联盟全皮肤并保存
最新推荐文章于 2020-10-18 22:27:53 发布