第四节:爬取王者荣耀英雄皮肤实战
课程目标
课程内容
编码实现
爬虫部分
import requests
from fake_useragent import UserAgent
import time
import bs4
import os
url = "https://pvp.qq.com/web201605/herolist.shtml"
us = UserAgent()
headers = {
"accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7",
"accept-language": "zh-CN,zh;q=0.9,en;q=0.8,en-GB;q=0.7,en-US;q=0.6",
"cache-control": "max-age=0",
"priority": "u=0, i",
"sec-ch-ua": "\"Chromium\";v=\"128\", \"Not;A=Brand\";v=\"24\", \"Microsoft Edge\";v=\"128\"",
"sec-ch-ua-mobile": "?0",
"sec-ch-ua-platform": "\"Windows\"",
"sec-fetch-dest": "document",
"sec-fetch-mode": "navigate",
"sec-fetch-site": "none",
"sec-fetch-user": "?1",
"upgrade-insecure-requests": "1",
"user-agent": us.random
}
res = requests.get(url=url, headers=headers)
html_text = res.content.decode('gbk')
soup = bs4.BeautifulSoup(html_text, 'html.parser')
ul = soup.find("ul", {"class": "herolist clearfix"})
lis = ul.find_all('li')
save_dir = "imgs"
os.makedirs(save_dir, exist_ok=True)
for li in lis:
src = li.a.img.get('src')
name = li.a.img.get('alt')
src_url = "https:" + src
img_res = requests.get(src_url, headers=headers)
base = os.path.join(save_dir, f"{name}.jpg")
with open(base, "wb") as f:
f.write(img_res.content)
print(f"{name} 皮肤下载完成,保存路径为 {base}")
time.sleep(1)