网站地址
交友直播_在线交友_游戏陪玩_网络聊天交友_虎牙直播
爬虫代码
import requests
import json
import os
from urllib import request
def start(page):
# 要爬取的网址
# page: 页码
url = f'https://www.huya.com/cache.php?m=LiveList&do=getLiveListByPage&gameId=4079&tagAll=0&callback=getLiveListJsonpCallback&page={page}'
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.45 Safari/537.36'}
# 开始爬取虎牙的妹子
response = requests.get(url, headers=headers)
content = response.text
result = content[len('getLiveListJsonpCallback('): -1]
# json解析: json字符串 => python字典
result2 = json.loads(result)
# print(result2)
# 当前页的所有妹子
meizi_list = result2['data']['datas']
for meizi in meizi_list:
nick = meizi['nick'] # 昵称
img = meizi['screenshot'] # 图片url
# print(nick, img)
# 下载图片,并以昵称作为图片名
try:
request.urlretrieve(img, f'huya/{page}-{nick}.png')
request.urlcleanup() # 清空缓存
print(f'{page}-{nick}.png 下载完成!')
except:
print('error:', nick)
if __name__ == '__main__':
if not os.path.exists("./huya"):
os.mkdir("./huya")
for page in range(1,30):
start(page)
爬虫结果