喜马拉雅音乐数据下载
import hashlib
import json
import random
import time
import re
import requests
def get_id(url, header):
response = requests.get(url=url, headers=header).text
music_id = re.findall(r'<a title=".*?" href="/yinyue/\d+?/(.*?)"><span class="title _Vc">', response)
music_name = re.findall('<span class="title _Vc">(.*?)</span>', response)
music_txt = re.findall('<span class="count _Vc"><i class="xuicon xuicon-erji1 _Vc"></i>(.*?)</span>', response)
music_list = map(lambda x: x.replace('万', ''), music_txt)
music_info = zip(music_id, music_name, music_list)
return music_info
def get_server_time(url, header):
return requests.get(url=url, headers=header).text
def make_sign(url, header):
now_time = str(round(time.time() * 1000))
server_time = get_server_time(url, header)
sign = str(hashlib.md5("himalaya-{}".format(server_time).encode()).hexdigest()) + "({})".format(
str(round(random.random() * 100))) + server_time + "({})".format(str(round(random.random() * 100))) + now_time
return sign
def get_url(src_url, header):
r = requests.get(url=src_url, headers=header)
m = json.loads(r.text)
return m['data']['src']
def save_music(url, header, music_name):
print(music_name, "...")
music_src_json = requests.get(url=url, headers=header)
with open(music_name + ".mp3", mode='wb') as f:
f.write(music_src_json.content)
print(music_name, " is complete.")
def get_music(info):
items = info['items']
pages = info['pages']
item_pages = zip(items, pages)
header = {
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 '
'(KHTML, like Gecko) Chrome/85.0.4183.83 Safari/537.36',
}
server_time_url = 'https://www.ximalaya.com/revision/time'
for item_page in item_pages:
for page in range(1, item_page[1]):
if page == 1:
id_url = 'https://www.ximalaya.com/yinyue/' + item_page[0] + '/'
else:
id_url = 'https://www.ximalaya.com/yinyue/' + item_page[0] + '/' + "p" + str(page)
music_lists = get_id(url=id_url, header=header)
for info in music_lists:
music_add_url = f'https://www.ximalaya.com/revision/play/v1/audio?id={info[0]}&ptype=1'
music_name = info[1]
music_txt = info[2]
music_name = music_txt + "_" + music_name
sign = make_sign(server_time_url, header)
header['xm-sign'] = sign
save_url = get_url(src_url=music_add_url, header=header)
save_music(save_url, header, music_name)
if __name__ == '__main__':
music = {
'items': ['291718', '16155903'],
'pages': [12, 10]
}
get_music(music)