王者荣耀皮肤爬取-CSDN博客

本文链接：https://blog.csdn.net/q2281582306/article/details/80587820

今天听了老师的课，跟着做了一个爬取王者农药皮肤的爬虫

from bs4 import BeautifulSoup
import urllib.request
import requests
import json
import  os

#王者荣耀皮肤爬取,爬取皮肤的json文件
v_herolist_url = urllib.request.urlopen("http://pvp.qq.com/web201605/js/herolist.json")
#请求网站
'''
json{key: value,........} 轻量级数据结构
'''
v_herolist = v_herolist_url.read().decode('utf-8')   #转换编码
#字符串截取
#\xef\xbb\xbf ---- Python自动加入的编码方式声明
v_herolist = v_herolist.encode('utf8')[3:].decode('utf-8')
# print(v_herolist)
# #转换json格式
hero_json = json.loads(v_herolist)

#创建一个本地文件夹
hero_dir = 'G:\myhero1\\'

cnt = 0
for i in range(len(hero_json)):
    hero_id = hero_json[i]['ename']  #英雄id
    hero_name = hero_json[i]['cname'] #英雄名
    skin_name = hero_json[i]['skin_name'].split('|') #皮肤名
    if not os.path.exists(hero_dir):
        os.mkdir(hero_dir)
    for j in range(len(skin_name)):
        try:
            #在路径里创建jpg再代替他
            hero_img = hero_dir + (hero_name + "-" + skin_name[j-1] + ".jpg")
            url_aim = "http://game.gtimg.cn/images/yxzj/img201606/skin/hero-info/" + str(hero_id) + "/" + str(hero_id) + "-bigskin-" + str(j+1) + ".jpg"
            urllib.request.urlretrieve(url_aim,hero_img)
            cnt += 1
            print("正在写入:" + hero_name + "-" + skin_name[j])
        except(IndexError):
            print("出现越界错误")


print("爬取完成! 共爬取:" + str(cnt) + "次")

效果图：