使用 Python 批量下载喜马拉雅有声书音频
文章目录
介绍
在提供喜马拉雅有声书的网页地址(例:https://www.ximalaya.com/youshengshu/3544633/)后,可批量下载这个有声书的所有音频到书名所在的目录。
原理
- 分析 URL 得到 book_id。
- 通过 book_id 获取书名和总集数。
- 通过 book_id 获取每集的列表。
- 在列表中提取每集的标题和 audio_id。
- 通过 audio_id 获取 audio_url。
- 通过 audio_url 获取音频流数据。
用到的 API
# 通过 book_id 获取书名和总集数
https://www.ximalaya.com/revision/album?albumId=book_id
# 通过 book_id 获取每集的列表
https://www.ximalaya.com/revision/album/v1/getTracksList?albumId=book_id&pageNum=page_num
# 通过 audio_id 获取 audio_url
https://www.ximalaya.com/revision/play/v1/audio?id=audio_id&ptype=1
源码
import re
import sys
import time
import json
import requests
import subprocess
from pathlib import Path
from typing import Tuple, List, Generator
def http_get(url: str):
headers = {
# "accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9",
# "accept-encoding": "gzip, deflate, br",
# "accept-language": "zh-CN,zh;q=0.9",
# "cache-control": "max-age=0",
# "sec-fetch-dest": "document",
# "sec-fetch-mode": "navigate",
# "sec-fetch-site": "none",
# "sec-fetch-user": "?1",
# "upgrade-insecure-requests": "1",
"user-agent": "Windows",
}
respond = requests.get(url, headers=headers)
respond.raise_for_status()
return respond.content
def get_description(book_id: int) -> Tuple[str, str]:
url = r"https://www.ximalaya.com/revision/album?albumId=%d" % book_id
data = json.loads(http_get(url))
title = data['data']['mainInfo']['albumTitle']
total_count = data['data']['tracksInfo']['trackTotalCount']
return titl