python爬虫入门-1（下载王者英雄图片）

最新推荐文章于 2024-09-22 14:50:35 发布

Weiyaner

最新推荐文章于 2024-09-22 14:50:35 发布

阅读量389

点赞数 1

文章标签： python 爬虫入门批量下载王者英雄图像

本文链接：https://blog.csdn.net/weixin_42327752/article/details/93322068

版权

1.流程

王者荣耀官网，进入英雄界面https://pvp.qq.com/web201605/herolist.shtml
进入审查元素
找到每个英雄的图片URL和名称所在位置

    src="//game.gtimg.cn/images/yxzj/img201606/heroimg/506/506.jpg"
    alt="云中君"

主程序

# -*- coding: utf-8 -*-
"""
Created on Sat Jun 22 09:16:17 2019

@author: weiyaner
'''
# download_hero_img(data1,data2)
    函数功能：将一个英雄的图片从网络上下载到本地，并以英雄名保存
    参数描述：
    data1:英雄名字
    data2: 英雄图片的URL
    返回值：None
# cutstring(data,str_start,str_end)
    函数功能：对data字符串进行选定部分截取
    参数描述：
    str_start：开始标识str
    str_end:结束标识str
    返回值：目标内容list

"""

import urllib.request
#from urllib.parse import quote
#import string

def download_hero_img(data1,data2):

    i=0
    while i<=len(data1):
        try:
#            url = "http://game.gtimg.cn/images/yxzj/img201606/heroimg/%s/%s.jpg" % (i,i)
#            s = quote(data2,safe=string.printable) #解决中英文网址读取错误方案
            f1=urllib.request.urlopen(data2[i])
        except urllib.error.HTTPError:
            i+=1
            continue
        f2 = open("heros\\%s.jpg" % data1[i],"wb")
        img_data = f1.read()
        f2.write(img_data)
        i+=1
        f1.close()
        f2.close()
        print("已完成%d张" % i)

def cutstring(data,str_start,str_end):
    s1 = str_start
    s2 = str_end
    i=0
    l=len(data)
    web=[]
    Id=[]
    a=0
    while i<l: 
        #获取图片网址
        m1= data.find(s1,i,l)
        m1+=len(s1)   
        m2 = data.find(s2,m1)
        web.append(data[m1:m2])
        Id.append(m1)
        i=m1     
        a+=1
        if Id[a-1]-Id[a-2]<0:
            print(0)
            break       
    return web

with urllib.request.urlopen("https://pvp.qq.com/web201605/herolist.shtml") as f:
    data = f.read().decode("gbk")  #将字节型数据转换成字符串类型
data1=cutstring(data,'alt="','"')
data2=cutstring(data,'src="','"')
for i in range(0,len(data1)):
    if data2[i][0] != "'" and data2[i][-1] != "s" and data2[i][0] != "o":  #消除垃圾内容
        data2[i] = "https:" + data2[i]
#        print(data1[i],data2[i])

def main():
    download_hero_img(data1,data2)
if __name__ == 'main':
    main()