# -- coding: utf-8 --
"""
代码实现:
1.发送请求
2.获取数据
3.解析数据
4.保存数据
"""
import requests
import os
import time
file_path = 'musics\\'
if not os.path.exists(file_path):
os.mkdir(file_path)
# 伪装
headers = {
'Cookie': 'Hm_lvt_cdb524f42f0ce19b169a8071123a4797=1659252846; '
'Hm_lpvt_cdb524f42f0ce19b169a8071123a4797=1659252846; _ga=GA1.2.1319118617.1659252847; '
'_gid=GA1.2.1887200127.1659252847; kw_token=98ZQ8KTOPZ',
'csrf': '98ZQ8KTOPZ',
'Host': 'www.kuwo.cn',
'Referer': 'http://www.kuwo.cn/search/list?key=%E9%99%88%E5%A5%95%E8%BF%85',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/103.0.0.0 '
'Safari/537.36',
}
keyword = input('请输入您要搜索的歌手或歌曲:')
dir_name = file_path + keyword + '\\'
if not os.path.exists(dir_name):
os.mkdir(dir_name)
count = success = fail = 0
# 爬取前3页
for page_num in range(1, 4):
url = f"http://www.kuwo.cn/api/www/search/searchMusicBykeyWord?key={keyword}&pn={page_num}&rn=30&httpsStatus=1&reqId=23599db0" \
f"-10a4-11ed-8a4c-b5ed1a6dd7c8 "
# 1.发送请求
response = requests.get(url, headers=headers)
# <Response [403]>:没有定位到网络资源
# <Response [200]>:请求成功
# print(response.json())
# 2.获取数据
# .text获取字符串数据 .json()获取字典数据
json_data = response.json()
# 3.解析数据
data_list = json_data['data']['list']
for data in data_list:
try:
artist = data['artist'] # 歌手
name = data['name'] # 歌名
album = data['album'] # 专辑
rid = data['rid'] # 歌曲id
play_url = f'http://www.kuwo.cn/api/v1/www/music/playUrl?mid={rid}&type=music&httpsStatus=1&reqId=87b1ccc1-10a8' \
f'-11ed-90ca-8da23cf7f79d '
# print(play_url)
count += 1
# 4.保存数据
music_json = requests.get(play_url).json()['data']['url']
# 获取音频二进制数据
music_data = requests.get(music_json).content
# 保存音频
with open(f'{dir_name}{name}--{artist}.mp3', mode='wb') as f:
f.write(music_data)
success += 1
print(f'第{count}首:{name}--{artist}--下载完成')
time.sleep(0.2)
# 出现异常,捕获并输出
except Exception as e:
print(e)
print(f'第{count}首:{name}--{artist}--下载失败')
fail += 1
# 继续下载下一首
continue
if len(data_list) < 30:
break
time.sleep(0.5)
print(f'下载完毕,成功下载{keyword}相关歌曲--{success}首')
爬取效果: