炉石传说 爬取全部卡牌

之前我30行爬了英雄联盟全部皮肤
这次爬炉石稍稍麻烦点,50行

网页分析

首先,我们分析炉石官方网站卡牌工具https://hs.blizzard.cn/cards/
通过源代码和Network分析,发现返回的卡牌是用post请求的json文件

请求数据为:
cardClass: hunter  #卡的类型:职业
keywords: 
standard: 1
t: 1576286199445 #时间串
cardSet: #费用
p: 1#页码(每页8张卡)

导入全部所需包

import urllib.request
import urllib.parse
import jsonpath
import json
import os
import time

请求并解析全部js

cardClasses=["druid","hunter","mage","paladin","priest","rogue","shaman","warlock","warrior","neutral"]

lushi_urls = 'https://hs.blizzard.cn/action/cards/query'
headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.108 Safari/537.36'
}

for cardClass in cardClasses:
    if not os.path.exists(cardClass):
        os.mkdir(cardClass)
    print("开始爬{}了".format(cardClass))
    for p in range(1,60):
        print(p)
        try:
            lushi_urls = 'https://hs.blizzard.cn/action/cards/query'
            headers = {
                'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.108 Safari/537.36'
            }
            data = {
                'cost': '',
                'cardClass': cardClass,
                'keywords': '',
                'standard': '1',
                't': int(time.time()),
                'cardSet': '',
                'p': p
            }
            data = urllib.parse.urlencode(data).encode("utf-8")
            request = urllib.request.Request(url=lushi_urls, headers=headers, data=data)
            response = urllib.request.urlopen(request)
            content = response.read().decode('utf-8')
            jsondict = json.loads(content)
            card_names = jsonpath.jsonpath(jsondict, '$..cards..name')
            card_pics = jsonpath.jsonpath(jsondict, '$..cards..pic')

创建目录并保存文件

for i in range(len(card_names)):
	card_path=cardClass+"/"+card_names[i]+".png"
	 urllib.request.urlretrieve(url=card_pics[i], filename=card_path)

完整全代码

import urllib.request
import urllib.parse
import jsonpath
import json
import os
import time

print("努力成为爬虫大神")
timestart=time.time()
cardClasses=["druid","hunter","mage","paladin","priest","rogue","shaman","warlock","warrior","neutral"]

lushi_urls = 'https://hs.blizzard.cn/action/cards/query'
headers = {
   'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.108 Safari/537.36'
}

for cardClass in cardClasses:
   if not os.path.exists(cardClass):
       os.mkdir(cardClass)
   print("开始爬{}了".format(cardClass))
   for p in range(1,60):
       print(p)
       try:
           lushi_urls = 'https://hs.blizzard.cn/action/cards/query'
           headers = {
               'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.108 Safari/537.36'
           }
           data = {
               'cost': '',
               'cardClass': cardClass,
               'keywords': '',
               'standard': '1',
               't': int(time.time()),
               'cardSet': '',
               'p': p
           }
           data = urllib.parse.urlencode(data).encode("utf-8")
           request = urllib.request.Request(url=lushi_urls, headers=headers, data=data)
           response = urllib.request.urlopen(request)
           content = response.read().decode('utf-8')
           jsondict = json.loads(content)
           card_names = jsonpath.jsonpath(jsondict, '$..cards..name')
           card_pics = jsonpath.jsonpath(jsondict, '$..cards..pic')
           for i in range(len(card_names)):
               card_path=cardClass+"/"+card_names[i]+".png"
               urllib.request.urlretrieve(url=card_pics[i], filename=card_path)
       except:
           continue
timeend=time.time()
print("一共用时:{}秒".format(timeend-timestart))

成果今晚的酒馆?

  • 1
    点赞
  • 3
    收藏
    觉得还不错? 一键收藏
  • 1
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值