importrequestsimportreimportosfrom lxml importetreedefget_index():
url= 'https://pvp.qq.com/web201605/herolist.shtml'response=requests.get(url)if response.status_code == 200:return response.content.decode('gbk')else:returnNonedefparse_index(html):
selector=etree.HTML(html)
links= selector.xpath('//*[@class="herolist clearfix"]/li/a/@href')
links= ['https://pvp.qq.com/web201605/' + link for link inlinks]returnlinksdefparse_deatil(url):
response=requests.get(url)
html= response.content.decode('gbk')
name= re.findall(r'
(.*?)
', html, re.S)id= re.findall(r'(\d+)', html, re.S)
skin_name_str= re.findall(r'
- ', html, re.S)
skin_name_list= skin_name_str[0].split('|')
skin_name_list= [name.split('&')[0] for name inskin_name_list]
dir_name= '王者荣耀皮肤'
#保存目录
if notos.path.exists(dir_name):
os.mkdir(dir_name)for i, skin_name inenumerate(skin_name_list):
skin_url= 'http://game.gtimg.cn/images/yxzj/img201606/skin/hero-info/{}/{}-bigskin-{}.jpg'.format(id[0], id[0], i+1)
file_name= '{}.jpg'.format(skin_name)
res=requests.get(skin_url)if not os.path.exists(dir_name + '/' +name[0]):
os.mkdir(dir_name+ '/' +name[0])
with open(dir_name+ '/' + name[0] + '/' + file_name, 'wb') as f:
f.write(res.content)print('正在爬取:{}{}'.format(name[0], skin_name_list))if __name__ == '__main__':
html=get_index()for link inparse_index(html):
parse_deatil(link)