原创文|Space9
K歌自嗨让我们嗨得更自由
需求场景
- 我们自唱的音频想保留音频文件,分发给朋友或者保存到别的地方
- 我们想将自唱的音频作为视频背景音乐或者二次编辑音频文件
- 我们注销账号时,主流平台没有提供打包下载的功能或者下载自己的K歌有某些限制
实现方式(仅以唱鸭为例)
- 获取分享页面数据
我们可以搜索发现id名为“NEXT_DATA”的script标签 - 解析json数据,获取我们需要的下载链接及音频名称
- 通过调用aria2c.exe来完成我们的下载
源码
Amusic.py
import base64
import json
import random
import re
import time
from urllib import parse
import requests
# 解析jsonp数据格式为json
def loads_jsonp(_jsonp):
try:
return json.loads(re.match(".*?({.*}).*", _jsonp, re.S).group(1))
except:
raise ValueError('Invalid Input')
def get_xima_music_parm(music_url):
print("开始获取喜马拉雅的参数")
# 将%xx转义符替换为它们的单字符等效项
url_data = parse.unquote(music_url)
# url结果
result = parse.urlparse(url_data)
print(result)
song_id = result.path.rsplit('/', 1)[1]
print(song_id)
url = "https://m.ximalaya.com/tracks/" + song_id + ".json"
headers = {
'Host': "m.ximalaya.com",
'Accept-Encoding': "gzip, deflate",
'Connection': "keep-alive",
'cache-control': "no-cache"
}
response = requests.request("GET", url, headers=headers)
print(response.text)
music_data = json.loads(response.text)
# 文件名不能包含下列任何字符:\/:*?"<>| 英文字符
music_name = re.sub(r'[\\/:*?"<>|\r\n]+', "", music_data["title"])
print(music_name)
play_path_64 = music_data["play_path_64"]
music_parm = [music_name, play_path_64]
return music_parm
def get_changba_music_parm(music_url):
print("开始获取唱吧的参数")
html = requests.get(music_url).text
is_video_res = re.search(r'&isvideo=(?P<is_video>[\s\S]*?)\'', html)
is_video = is_video_res.groupdict()['is_video']
if is_video == "0":
title_res = re.search(r'<div class="title">(?P<title>[\s\S]*?)</div>', html)
origin_name = title_res.groupdict()['title']
if origin_name == " ":
music_name = "changba" + str(int(round(time.time() * 1000)))
print(music_name)
else:
# 文件名不能包含下列任何字符:\/:*?"<>| 英文字符
music_name = re.sub(r'[\\/:*?"<>|\r\n]+', "", origin_name)
print(music_name)
sub_url_res = re.search(r'var a="http(?P<sub_url>[\s\S]*?)",', html)
sub_url = sub_url_res.groupdict()['sub_url']
mp3_url = "http" + sub_url
else:
sub_url_res = re.search(r'video_url: \'(?P<sub_url>[\s\S]*?)\',', html)
sub_url = sub_url_res.groupdict()['sub_url']
str_url = base64.b64decode(sub_url).decode("utf-8")
mp3_url = "http:" + str_url
music_name = "changba" + str(int(round(time.time() * 1000)))
print(music_name)
music_parm = [music_name, mp3_url]
return music_parm
# (已发现缺陷)打印日志不全,文件头过长,不过文件转换没有问题
def get_lizhi_music_parm(music_url):
print("开始获取荔枝的参数")
# 将%xx转义符替换为它们的单字符等效项
url_data = parse.unquote(music_url)
# url结果
result = parse.urlparse(url_data)
print(result)
song_id = result.path.rsplit('/', 1)[1]
print(song_id)
url = "https://m.lizhi.fm/vodapi/voice/info/" + song_id
headers = {
'Host': "m.lizhi.fm",
'Accept-Encoding': "gzip, deflate",
'Connection': "keep-alive",
'cache-control': "no-cache"
}
response = requests.request("GET", url, headers=headers)
print(response.text)
music_data = json.loads(response.text)["data"]
# 文件名不能包含下列任何字符:\/:*?"<>| 英文字符
music_name = re.sub(r'[\\/:*?"<>|\r\n]+', "", music_data["userVoice"]["voiceInfo"]["name"])
print(music_name)
track_url = music_data["userVoice"]["voicePlayProperty"]["trackUrl"]
music_parm = [music_name, track_url]
return music_parm
def get_changya_music_parm(music_url):
print("开始获取唱鸭的参数")
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.129 Safari/537.36'
}
html = requests.request("GET", music_url, headers=headers).text
song_json_res = re.search(
r'<script id="__NEXT_DATA__" type="application/json" crossorigin="anonymous">(?P<song_json>[\s\S]*?)</script>',
html)
song_data = song_json_res.groupdict()['song_json'].strip()
music_data = json.loads(song_data)["props"]["pageProps"]["clip"]
# 文件名不能包含下列任何字符:\/:*?"<>| 英文字符
music_name = re.sub(r'[\\/:*?"<>|\r\n]+', "", music_data["songName"])
print(music_name)
mp3_url = music_data["audioSrc"]
music_parm = [music_name, mp3_url]
return music_parm
def get_changya2_music_parm(music_url):
print("开始获取唱鸭2的参数")
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.129 Safari/537.36'
}
html = requests.request("GET", music_url, headers=headers).text
song_json_res = re.search(
r'<script id="__NEXT_DATA__" type="application/json" crossorigin="anonymous">(?P<song_json>[\s\S]*?)</script>',
html)
song_data = song_json_res.groupdict()['song_json'].strip()
mp4_url = json.loads(song_data)["props"]["pageProps"]["url"]
music_name = "changya" + str(int(round(time.time() * 1000)))
print(music_name)
music_parm = [music_name, mp4_url]
return music_parm
def get_kugouchang_music_parm(music_url):
print("开始获取酷狗唱唱和斗歌和酷狗K歌的参数")
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.129 Safari/537.36'
}
res = requests.request("GET", music_url