王者荣耀英雄图片爬取（保存到一个文件夹里）

最新推荐文章于 2025-05-29 10:27:40 发布

shix .

最新推荐文章于 2025-05-29 10:27:40 发布

阅读量165

点赞数 3

分类专栏：爬虫文章标签： python beautifulsoup

本文链接：https://blog.csdn.net/2302_80729149/article/details/146252220

版权

爬虫专栏收录该内容

15 篇文章

订阅专栏

这里我直接使用的正则匹配

import requests

import re
import os

headers = {
    "accept": "application/json, text/javascript, */*; q=0.01",
    "accept-language": "zh-CN,zh;q=0.9,oc;q=0.8",
    "cache-control": "no-cache",
    "content-type": "application/json; charset=utf-8",
    "pragma": "no-cache",
    "priority": "u=1, i",
    "referer": "https://pvp.qq.com/web201605/herolist.shtml",
    "sec-ch-ua": "\"Not(A:Brand\";v=\"99\", \"Google Chrome\";v=\"133\", \"Chromium\";v=\"133\"",
    "sec-ch-ua-mobile": "?0",
    "sec-ch-ua-platform": "\"Windows\"",
    "sec-fetch-dest": "empty",
    "sec-fetch-mode": "cors",
    "sec-fetch-site": "same-origin",
    "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/133.0.0.0 Safari/537.36",
    "x-requested-with": "XMLHttpRequest"
}
url = "https://pvp.qq.com/web201605/herolist.shtml"
response = requests.get(url, headers=headers)
response.encoding = "gbk"
# 假设 response.text 包含 HTML 内容
html_content = response.text
print(response.text)
# 正则表达式匹配图片链接和 alt 属性
pattern = r'<img\s+src="([^"]+)"\s+[^>]*alt="([^"]+)"'

# 使用 re.findall 提取所有匹配结果
matches = re.findall(pattern, html_content)
    # if not os.path.exists(folder_path):
    #     os.makedirs(folder_path)
    # with open(f"{folder_path}/凡人修仙传.txt", "a+", encoding="utf-8") as f:
    #     f.write(head.center(80)+"\n\n")
    #     for i in title:
    #         f.write(i.text+"\n")
# 打印提取结果
path="D:/小说/王者荣耀壁纸"
if not os.path.exists(path):
    os.makedirs(path)
for img_url, alt_name in matches:
    with open(f"{path}/{alt_name}.jpg", "wb") as f:
        f.write(requests.get("https:"+img_url).content)
    print(f"人物名称: {alt_name}, 图片链接: {img_url}")