受到~wangweijun的20行Python代码爬取王者荣耀全英雄皮肤的启发
我去试试同样的方法爬取LOL皮肤,果然方法大同小异,毕竟都是腾讯系的
首先进入LOL官网
进入资料库
获取全英雄的详细列表,包括ID、绰号、英文名、中文名等等
url = 'https://game.gtimg.cn/images/lol/act/img/js/heroList/hero_list.js'
herolist = requests.get(url)
以火女皮肤地址为例
https://game.gtimg.cn/images/lol/act/img/skin/big1000.jpg
https://game.gtimg.cn/images/lol/act/img/skin/big1001.jpg
https://game.gtimg.cn/images/lol/act/img/skin/big1002.jpg
找到规律big后面是英雄ID,000、001、002是皮肤编码,只要编辑好这些图片地址,就可以了
接下来完整的代码
现在要存放的地方建一个名为“lol”的文件夹即可
import os
import requests
from urllib import error
import socket
url = 'https://game.gtimg.cn/images/lol/act/img/js/heroList/hero_list.js'
herolist = requests.get(url) # 获取英雄列表json文件
herolist_json = herolist.json() # 转化为json格式
hero_name = list(map(lambda x: x['name'], herolist.json()['hero'])) # 提取英雄的绰号
hero_title = list(map(lambda x: x['title'], herolist.json()['hero'])) # 提取英雄的名字
hero_number = list(map(lambda x: x['heroId'], herolist.json()['hero'])) # 提取英雄的编号
headers = {'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.120 Safari/537.36'}
# 下载图片
def downloadPic():
i = 0
for j in hero_number:
# 创建文件夹
os.mkdir("E:\\Picture\\lol\\" + hero_name[i]+"-"+hero_title[i])
# 进入创建好的文件夹
os.chdir("E:\\Picture\\lol\\" + hero_name[i]+"-"+hero_title[i])
i += 1
for k in range(20):
# 拼接url,如果K小于10中间加两个“0”,否则一个“0”
if k < 10:
onehero_link = 'https://game.gtimg.cn/images/lol/act/img/skin/big' + str(j) + '00' + str(k) + '.jpg'
else:
onehero_link = 'https://game.gtimg.cn/images/lol/act/img/skin/big' + str(j) + '0' + str(k) + '.jpg'
try:
im = requests.get(onehero_link,headers=headers)
except error.URLError as e:
if isinstance(e.reason,socket.timeout):
print('超时,执行下一个请求')
# 请求url
if im.status_code == 200:
open(str(k) + '.jpg', 'wb').write(im.content) # 写入文件
downloadPic()
成果如下
下面这种方法可以爬到皮肤名字
import urllib.request
import jsonpath
import json
import os
import time
from urllib import error
import socket,requests
print("努力成为爬虫大神")
timestart=time.time()
headers = {'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.120 Safari/537.36'}
if not os.path.exists("json"):
os.mkdir("json")
for s in range(555, 600):
try:
hero_urls = 'https://game.gtimg.cn/images/lol/act/img/js/hero/'+str(s)+'.js'
j_name = 'json/zms'+str(s)+'.json'
urllib.request.urlretrieve(url=hero_urls, filename=j_name)
obj = json.load(open(j_name, 'r', encoding='utf-8'))
hero_name = jsonpath.jsonpath(obj, '$.hero..name')
hero_title = jsonpath.jsonpath(obj, '$.hero..title')
skins_name = jsonpath.jsonpath(obj, '$.skins..name')
skins_mainImg = jsonpath.jsonpath(obj, '$.skins..mainImg')
print("开始爬{}".format(hero_name[0]))
docname = hero_title[0] + " " + hero_name[0]
if not os.path.exists(docname):
os.mkdir(docname)
for i in range(len(skins_name)):
if skins_mainImg[i] != "":
try:
im = requests.get(skins_mainImg[i],headers=headers)
open(docname + "/" + skins_name[i] + ".jpg", 'wb').write(im.content) # 写入文件
except error.URLError as e:
if isinstance(e.reason,socket.timeout):
print('超时,执行下一个请求')
except:
continue
timeend=time.time()
print("一共用时:{}秒".format(timeend-timestart))