from requests_html import HTMLSession
from lxml import etree
session = HTMLSession()
import os,time
'''
1、确定主页
2、提取各个英雄的id
3、进入详情页
4、定位大图的地址 转换二进制之保存
'''
class Spider():
os_path = os.getcwd()+'/王者皮肤/'
if not os.path.exists(os_path) :
os.mkdir(os_path)
def __init__(self):
self.url = 'https://pvp.qq.com/web201605/herolist.shtml'
self.headers = {
'Cookie':'eas_sid=S1s7w0N8J4L8w1T16014w44005; eas_entry=https%3A%2F%2Fwww.baidu.com%2Flink%3Furl%3DvB6jj8vzMlNC2sEr7sN_fzagHnBY0DJ5IvRcLcG9BtC%26wd%3D%26eqid%3Df05cb47f001e80940000000365d55a49; LW_sid=b1E7d0X8l4c8D1k1f0i4d481R1; LW_uid=01P7O0k8g4z8E1b1l0D4u4O1N2; isHostDate=19774; PTTuserFirstTime=1708473600000; isOsSysDate=19774; PTTosSysFirstTime=1708473600000; isOsDate=19774; PTTosFirstTime=1708473600000; pgv_info=ssid=s5125162219; ts_refer=www.baidu.com/link; pgv_pvid=3206785980; ts_uid=4956394215; weekloop=0-0-0-8; ieg_ingame_userid=jD83hul578psMEp9V6DDjIJ8fDJGeIGE; ts_last=pvp.qq.com/web201605/herolist.shtml; pvpqqcomrouteLine=index_herolist_herodetail_herodetail_herolist_herolist_herolist; PTTDate=1708483090508',
'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36',
}
def get_x_url(self):
response = session.get(self.url,headers=self.headers).html
hero_name = response.xpath('//div[@class="herolist-content"]/ul/li/a/text()')
hero_url = response.xpath('//div[@class="herolist-content"]/ul/li/a/@href')
self.parse_x(hero_name,hero_url)
def parse_x(self,name,id):
for names,ids in zip(name,id):
id_temp = ids[11:-6]
x_url = 'https://pvp.qq.com/web201605/herodetail/'+id_temp+'.shtml'#找到详情页
self.parse_s(x_url,id_temp,names)
#再对详情页发起请求 提取皮肤
def parse_s(self,x_url,id,hero_name):
result = session.get(x_url,headers=self.headers).html
pifu_name = ''.join(result.xpath('//div[@class="pic-pf"]/ul/@data-imgname'))
tempt1 = pifu_name.split('|')#变成列表形式 并且把|去掉
name_list = []
for i in tempt1:
name_list.append(i.split('&')[0])
for num,name in enumerate(name_list):
img_url = f'https://game.gtimg.cn/images/yxzj/img201606/skin/hero-info/{id}/{id}-bigskin-{num+1}.jpg'
# print(img_url)
data = session.get(img_url).content
self.save_pifu(data,name,hero_name)
def save_pifu(self,datas,names,hero_name):
os_path = self.os_path+f'/{hero_name}/'
if not os.path.exists(os_path):
os.mkdir(os_path)
with open(os_path+names+'.jpg', 'wb')as f:
f.write(datas)
print(f'{hero_name}----皮肤{names}---下载完成')
if __name__ == '__main__':
s = Spider()
s.get_x_url()
王者荣耀官网英雄皮肤爬取实战
最新推荐文章于 2024-11-03 00:03:15 发布