python3爬虫之多线程爬取王者荣耀所有皮肤图片
代码实现,时间大约半分钟
import requests
import os
import json
import threading
from lxml import etree
import time
h=[]
s=time.time()
def pa(j):
num = j['ename'] #从data中获取ename的值
name = j['cname']
res2 = requests.get("https://pvp.qq.com/web201605/herodetail/{}.shtml".format(num))
res2_decode = res2.content.decode('gbk') # 返回相应的html页面,字符串格式,解码为utf-8
_element = etree.HTML(res2_decode) # 将html转换为_Element对象,可以方便的使用getparent()、remove()、xpath()等方法
element_img = _element.xpath('//div[@class="pic-pf"]/ul/@data-imgname')
#print(element_img)
name_img = element_img[0].split('|') # 去掉字符串中的|字符,并分割
#print(name_img)
for i in range(0,10):
res1=requests.get("https://game.gtimg.cn/images/yxzj/img201606/skin/hero-info/{0}/{0}-bigskin-{1}.jpg".format(num,i+1)) #返回响应包
if res1.status_code == 200:
aa=name_img[i].find('&')
#print(aa)
bb=name_img[i][:aa]
res_img=res1.content #把相应包内容转换为2进制
a = './王者荣耀/' + str(name)
b='./王者荣耀/'+str(name)+'/'+bb+'.jpg'
if not os.path.exists('./王者荣耀/'):
os.mkdir('./王者荣耀/')
if not os.path.exists(a):
os.mkdir(a)
with open(b,"wb") as f: #创建一个名为1.jpg的图片
f.write(res_img) #把响应包2进制内容写入到1.jpg中
print(name, bb)
else:
break
def duo():
response=requests.get('https://pvp.qq.com/web201605/js/herolist.json')
data=json.loads(response.text)
#print(data)
for j in data:
t=threading.Thread(target=pa,args=(j,))
t.start()
h.append(t)
for k in h:
k.join()
if __name__=='__main__':
duo()
g=time.time()
print("用时:",g-s,"秒")