多线程爬取王者荣耀皮肤

weixin_51749975

已于 2023-03-17 15:12:41 修改

阅读量79

点赞数

文章标签： python 开发语言

于 2023-03-17 15:09:11 首次发布

本文链接：https://blog.csdn.net/weixin_51749975/article/details/129621237

版权

# -*- encoding:utf-8 -*-
# --Author: Administrator
# --Time: 2023/03/17 14:59

import os
import parsel
import requests
import re
from concurrent.futures import ThreadPoolExecutor
import concurrent.futures
from tools.tool.founctions import save_directory
from utils.logging_tool.log_control import INFO

path = save_directory()

# 列表切割
def list_remove(children_list_len, filter_list):
"""
:param children_list_len: 指定切割的子列表的长度
:return: 新列表
"""
list_of_groups = zip(*(iter(filter_list),) * children_list_len)
end_list = [list(i) for i in list_of_groups]
count = len(filter_list) % children_list_len
end_list.append(filter_list[-count:]) if count != 0 else end_list
return end_list

def get_response(url):
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.138 Safari/537.36'}
response = requests.get(url=url, headers=headers)
# 万能转码方式
response.encoding = response.apparent_encoding
return response

def get_hero_id():
hero_id_list = []
url = 'https://pvp.qq.com/web201605/js/herolist.json'
response = get_response(url).json()
for data in response:
hero_id = data['ename']
hero_id_list.append(hero_id)
return hero_id_list

def download_image_data(hero):
for hero_id in hero:
hero_url = f'https://pvp.qq.com/web201605/herodetail/{hero_id}.shtml'
response = get_response(hero_url)
selector = parsel.Selector(response.text)
hero_name = selector.css('.cover-name::text').get()
skin_name = re.findall('<ul class="pic-pf-list pic-pf-list3" data-imgname="(.*?)">', response.text)[0].split(
'|')
num = len(skin_name)
a = 0
for page in range(1, int(num) + 1):
a = a + 1
image_url = f'https://game.gtimg.cn/images/yxzj/img201606/skin/hero-info/{hero_id}/{hero_id}-bigskin-{page}.jpg'
image_content = get_response(image_url).content
path1 = path + f'\\{hero_name}\\'
if not os.path.exists(path1):
os.makedirs(path1)
filename = path1 + skin_name[int(page) - 1].split('&')[0] + '.jpg'
with open(filename, mode='wb') as f:
f.write(image_content)
INFO.logger.info(f"{a}: 正在保存：{hero_name}, {skin_name[int(page) - 1].split('&')[0]}")

def main():
data_list = list_remove(10, get_hero_id())
with concurrent.futures.ThreadPoolExecutor(max_workers=30) as executor:
for data in data_list:
executor.submit(download_image_data, data)

if __name__ == '__main__':
main()

weixin_51749975

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
0
评论
多线程爬取王者荣耀皮肤

INFO.logger.info(f"{a}: 正在保存：{hero_name}, {skin_name[int(page) - 1].split('&')[0]}"):param children_list_len: 指定切割的子列表的长度。:return: 新列表。
复制链接

扫一扫