这里我直接使用的正则匹配
import requests
import re
import os
headers = {
"accept": "application/json, text/javascript, */*; q=0.01",
"accept-language": "zh-CN,zh;q=0.9,oc;q=0.8",
"cache-control": "no-cache",
"content-type": "application/json; charset=utf-8",
"pragma": "no-cache",
"priority": "u=1, i",
"referer": "https://pvp.qq.com/web201605/herolist.shtml",
"sec-ch-ua": "\"Not(A:Brand\";v=\"99\", \"Google Chrome\";v=\"133\", \"Chromium\";v=\"133\"",
"sec-ch-ua-mobile": "?0",
"sec-ch-ua-platform": "\"Windows\"",
"sec-fetch-dest": "empty",
"sec-fetch-mode": "cors",
"sec-fetch-site": "same-origin",
"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/133.0.0.0 Safari/537.36",
"x-requested-with": "XMLHttpRequest"
}
url = "https://pvp.qq.com/web201605/herolist.shtml"
response = requests.get(url, headers=headers)
response.encoding = "gbk"
# 假设 response.text 包含 HTML 内容
html_content = response.text
print(response.text)
# 正则表达式匹配图片链接和 alt 属性
pattern = r'<img\s+src="([^"]+)"\s+[^>]*alt="([^"]+)"'
# 使用 re.findall 提取所有匹配结果
matches = re.findall(pattern, html_content)
# if not os.path.exists(folder_path):
# os.makedirs(folder_path)
# with open(f"{folder_path}/凡人修仙传.txt", "a+", encoding="utf-8") as f:
# f.write(head.center(80)+"\n\n")
# for i in title:
# f.write(i.text+"\n")
# 打印提取结果
path="D:/小说/王者荣耀壁纸"
if not os.path.exists(path):
os.makedirs(path)
for img_url, alt_name in matches:
with open(f"{path}/{alt_name}.jpg", "wb") as f:
f.write(requests.get("https:"+img_url).content)
print(f"人物名称: {alt_name}, 图片链接: {img_url}")