import os
import time
import requests
import json
from selenium.webdriver import Chrome
from lxml import etree
def requests_get(href):
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/101.0.4951.54 Safari/537.36'
}
resp = requests.get(url=href, headers=headers)
if resp.status_code == 200:
return resp
else:
print(resp.status_code)
def get_skin(num,name):
char_list = ['*', '|', ':', '?', '/', '<', '>', '"', '\\']
os.mkdir(f'./英雄皮肤/{name}')
hero_href = f'https://game.gtimg.cn/images/lol/act/img/js/hero/{num}.js'
resp = requests_get(hero_href)
data = resp.json()
# print(data['skins'],type(data['skins']))
for skin in data['skins']:
# print(skin['name'], skin['mainImg'])
if len(skin['mainImg']) != 0:
skin_img_url = skin['mainImg']
resp = requests.get(url=skin_img_url)
# 替换文件名中系统不支持的字符
for c in char_list:
if c in skin['name']:
skin['name'] = skin['name'].replace(c,'_')
print(skin['name'])
with open(f"./英雄皮肤/{name}/{skin['name']}.jpg", 'wb') as f:
f.write(resp.content)
b = Chrome()
b.get('https://lol.qq.com/data/info-heros.shtml')
time.sleep(1)
root = etree.HTML(b.page_source)
b.close()
heroes = root.xpath('//ul[@class="imgtextlist"]/li')
for i in heroes:
href = i.xpath('a/@href')
serial_number = href[0].split('=')[1]
print(serial_number)
hero_name = i.xpath('a/p/text()')
print(hero_name[0])
get_skin(serial_number, hero_name[0])
```![共创建159个文件夹](https://img-blog.csdnimg.cn/46ce4e2af9774c25a38c601437f1c850.png)
将皮肤名中的特殊字符进行了替换![在这里插入图片描述](https://img-blog.csdnimg.cn/0013f34e520b4f28b42e2a0a3dc98ead.png)
爬虫——英雄皮肤
最新推荐文章于 2024-09-13 15:59:37 发布