目标
爬取某主播所有音频
1、获取所有音频播放页地址
import requests
url = 'https://www.ximalaya.com/album/203355'
id = url.split('/')[-1]
headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/94.0.4606.71 Safari/537.36 SE 2.X MetaSr 1.0"}
url = f'https://www.ximalaya.com/revision/album/v1/getTracksList?albumId={id}&pageNum=1&pageSize=30'
page = requests.get(url=url, headers=headers).json()['data']['trackTotalCount']
pages = int(page / 30) + 1
for page in range(1, pages + 1):
url = f"https://www.ximalaya.com/revision/album/v1/getTracksList?albumId={id}&pageNum={page}&sort=1&pageSize=30"
datas = requests.get(url=url, headers=headers).json()['data']['tracks']
for data in datas:
title = data['title']
url = 'https://www.ximalaya.com' + data['url']
print(title)
print(url)
2、播放页地址找到存放下载地址
点击播放后才能找到m4a下载地址,搜索全局未能找到生成下载地址的位置
搜索m4a关键字,找到接口,原m4a地址被加密了
思路:找到解密的方法,把加密的密文穿进去得到加密后的m4a地址
js逆向解密 分析
1、只有在点击播放按钮才会请求接口,加密解密数据,xhr断点断不到点击后的内容
2、搜素关键字 decrypt,在可能出现的位置 都打上断点,刷新,点击播放
断点停在了此处,返回e的值,通过对比e的值和加密密文长度一致,decryptFn是方法,传入加密的值,返回一个结果,返回内容是一个函数方法,点击进去在 return t 处打断点继续执行
t就是想要的解密后的m4a下载地址,复制地址 在浏览器打开测试是否是正确的下载地址
js逆向解密 写代码
decryptFn: function(e) {
try {
return (0,
C.getSoundCryptLink)({
deviceType: "www2",
link: e
})
} catch (e) {
return console.error(e, "new sound Link occur error"),
""
}
}
执行这个函数可以获得 t值,代码有混淆,去掉混淆的写法
function getSoundCryptLink(e) {
deviceType: "www2",
link: e,
--getSoundCryptLink函数里边的内容
retuen t
}
t.getSoundCryptLink方法,内容全部复制
t.getSoundCryptLink = function(e) {
const {link: t="", deviceType: i="www2"} = e;
let u = o
, c = a;
["www2", "mweb2"].includes(i) || (u = r,
c = n);
try {
let e = l(t.replace(/_/g, "/").replace(/-/g, "+"));
if (null === e || e.length < 16)
return t;
let r = new Uint8Array(e.length - 16);
for (let t = 0; t < e.length - 16; t++)
r[t] = e.charCodeAt(t);
let n = new Uint8Array(16);
for (let t = 0; t < 16; t++)
n[t] = e.charCodeAt(e.length - 16 + t);
for (let e = 0; e < r.length; e++)
r[e] = u[r[e]];
for (let e = 0; e < r.length; e += 16)
d(r, e, n);
for (let e = 0; e < r.length; e += 32)
d(r, e, c);
return function(e) {
var t, r, n, o, a, i;
for (t = "",
n = e.length,
r = 0; r < n; )
switch ((o = e[r++]) >> 4) {
case 0:
case 1:
case 2:
case 3:
case 4:
case 5:
case 6:
case 7:
t += String.fromCharCode(o);
break;
case 12:
case 13:
a = e[r++],
t += String.fromCharCode((31 & o) << 6 | 63 & a);
break;
case 14:
a = e[r++],
i = e[r++],
t += String.fromCharCode((15 & o) << 12 | (63 & a) << 6 | (63 & i) << 0)
}
return t
}(r)
} catch (e) {
return console.warn(e, "secret failed"),
""
}
}
上个函数和里边的函数
getSoundCryptLink = function (e) {
{
deviceType: 'www2',
link:e
}
const {link: t = "", deviceType: i = "www2"} = e;
}
deviceType 最后赋值给了i 全局 deviceType都没有使用,只使用了 i 简写 i = ‘www2’
link是传入的值e 全局link都没有使用这个link执行代码 使用新的赋值 命名t, link又是传参 e,e被赋值给了t
getSoundCryptLink = function (t) {
i = "www2"
}
整理后代码
function decrypt (t) {
i="www2"
let u = o
, c = a;
["www2", "mweb2"].includes(i) || (u = r,
c = n);
try {
let e = l(t.replace(/_/g, "/").replace(/-/g, "+"));
if (null === e || e.length < 16)
return t;
let r = new Uint8Array(e.length - 16);
for (let t = 0; t < e.length - 16; t++)
r[t] = e.charCodeAt(t);
let n = new Uint8Array(16);
for (let t = 0; t < 16; t++)
n[t] = e.charCodeAt(e.length - 16 + t);
for (let e = 0; e < r.length; e++)
r[e] = u[r[e]];
for (let e = 0; e < r.length; e += 16)
d(r, e, n);
for (let e = 0; e < r.length; e += 32)
d(r, e, c);
return function (e) {
var t, r, n, o, a, i;
for (t = "",
n = e.length,
r = 0; r < n;)
switch ((o = e[r++]) >> 4) {
case 0:
case 1:
case 2:
case 3:
case 4:
case 5:
case 6:
case 7:
t += String.fromCharCode(o);
break;
case 12:
case 13:
a = e[r++],
t += String.fromCharCode((31 & o) << 6 | 63 & a);
break;
case 14:
a = e[r++],
i = e[r++],
t += String.fromCharCode((15 & o) << 12 | (63 & a) << 6 | (63 & i) << 0)
}
return t
}(r)
} catch (e) {
return console.warn(e, "secret failed"),
""
}
}
执行缺少o,代码函数往上找到 定义o,
执行缺少a,代码函数往上找到 定义a,