前言
本文目标功能基于B站API实现
使用到的API
URL: https://api.bilibili.com/x/player/pagelist
method: GET
remarks: 获取 视频cid 用以下载
url 参数:
参数名 | 必选 | 备注 |
---|---|---|
aid | 否 | 视频的av号(avid) |
bvid | 否 | 视频的bv号 |
jsonp | 否 | 默认值"jsonp"无需变动 |
注: aid
和 bvid
必选一个
headers 参数:
目前没有必要的请求头参数
返回值:
以json格式返回, 包含视频cid
part (标题)
等等…(包括所有子视频)
例:
URL: https://api.bilibili.com/x/player/playurl
method: GET
remarks: 获取视频资源
url 参数:
参数名 | 必选 | 备注 |
---|---|---|
qn | 否 | 指定返回下载链接的视频清晰度 |
otype | 否 | 默认值 “json” |
cid | 是 | avid bvid同一页面只需变更 cid 来下载子视频 |
avid | 否 | av号 |
bvid | 否 | bv号 |
bvid avid
必选一个, avid还需要转换
headers 参数:
参数名 | 备注 |
---|---|
Cookie | 只需要 SESSDATA 字段 |
User-Agent | 使用postman测试结果表示可以不用设置(建议添加) |
例:
内容限制对这些接口的描述比较片面, 如有不明白的地方可以自行研究,也可以加我(俺)一起讨论讨论。
基本思路
- 通过API 获取 视频
cid
- 可能含有多个视频
- 使用
cid
avid (或 bvid)
请求接口返回视频下载链接 - 下载
- 这个
playurl
接口返回的视频链接下载速度巨慢…可能需要多线程下载(至少俺就是这么淦的)
- 这个
具体实现
环境: Python3.9
requests==2.25.1
import requests
import threading
import os, time
from requests.api import head
class CodeChange:
def __init__(self) -> None:
self.table='fZodR9XQDSUm21yCkr6zBqiveYah8bt4xsWpHnJE7jL5VG3guMTKNPAwcF'
self.tr={}
for i in range(58):
self.tr[self.table[i]]=i
self.s=[11,10,3,8,4,6]
self.xor=177451812
self.add=8728348608
def decode(self, x):
r=0
for i in range(6):
r+=self.tr[x[self.s[i]]]*58**i
return (r-self.add)^self.xor
def encode(self, x):
x=(x^self.xor)+self.add
r=list('BV1 4 1 7 ')
for i in range(6):
r[self.s[i]]=self.table[x//58**i%58]
return ''.join(r)
class BiliBiliVideoDownloader:
USER_AGENT = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/88.0.4324.190 Safari/537.36"
SESSDATA = "你的SESSDATA"
# 用于 bv av 互转
change = CodeChange()
accept_description = []
accept_quality = []
def __init__(self, bvid) -> None:
# bv号
self.bvid = bvid
# av号
self.avid = self.change.decode(bvid)
# 获取视频信息列表
self.pagelist = self.__API_pagelist(bvid)
# 发送 api
def __SEND_api(self, url, api_name):
headers = {
"user-agent": self.USER_AGENT,
"cookie": f"SESSDATA={self.SESSDATA}"
}
try:
return requests.get(url, headers=headers)
except:
print(f"API Error with \"{api_name}\"")
def __API_pagelist(self, bvid):
url = f"https://api.bilibili.com/x/player/pagelist?bvid={bvid}&jsonp=jsonp"
response = self.__SEND_api(url, "__API_pagelist")
data = response.json()["data"]
# 列表生成器 (方便查看)
return [
{
"cid": item["cid"],
"page": item["page"],
"title": item["part"]
}
for item in data
]
def __API_playurl(self, qn, cid, bvid, otype="json"):
url = f"https://api.bilibili.com/x/player/playurl?qn={qn}&otype={otype}&cid={cid}&bvid={bvid}"
# print(url)
response = self.__SEND_api(url, "__API_playurl")
return response.json()["data"]
def Show_Accept_Quality(self):
if not (self.accept_description and self.accept_quality):
# use api get the video accept quality
data = self.__API_playurl("16", self.pagelist[0]["cid"], self.bvid)
self.accept_description = data["accept_description"]
self.accept_quality = data["accept_quality"]
# output
for i in range(len(self.accept_description)):
print(f"{i} {self.accept_description[i]}")
def Show_All_Page_Title(self):
for i in range(len(self.pagelist)):
print(f"{i}. {self.pagelist[i]['title']}")
def __Get_Download_Link(self, index : int, quality : int) -> dict:
"""
:data["durl"] 为一个列表
:只有一个元素时视频既是一整段视频
:大于一个元素每个元素都是一个视频切片 下载完成后还需要进行合并
"""
cid = self.pagelist[index]["cid"]
return self.__API_playurl(quality, cid, self.bvid)
def Start_Download(self, part, qualityIndex):
data = self.__Get_Download_Link(part, self.accept_quality[qualityIndex])
title = self.pagelist[part]["title"]
filename = f"{part}.{title}.{data['format'][:3]}" # 提取前三个字符作为后缀 [mp3, flv]
if len(data["durl"]) <= 1:
filesize = data["durl"][0]["size"]
url = data["durl"][0]["url"]
workerThread = DownloadWorkerThread(url, filename, filesize=filesize)
# 阻塞主线程
workerThread.setDaemon(True)
workerThread.start()
workerThread.join()
class DownloadWorkerThread(threading.Thread):
fileLock = threading.Lock()
threadCount = 30
maxBlockSize = 1024*1024*5
curRangNum = 0
curRangNumLock = threading.Lock()
headers = {
"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/88.0.4324.190 Safari/537.36",
"referer": "https://www.bilibili.com/"
}
EMPTY_STR = ""
ERROR_STATUS_VALUE = -1
def __init__(self, url, filename, **kwargs) -> None:
threading.Thread.__init__(self)
self.url = url
self.filename = filename
self.curSize = 0
self.filesize = kwargs.get("filesize") or self.Auto_Get_Filesize()
self.maxBlockSize = kwargs.get("maxBlockSize") or self.maxBlockSize
self.headers = kwargs.get("headers") or self.headers
self.threadCount = kwargs.get("threadCount") or self.threadCount
# check filename
if not os.path.exists(filename):
# Create file
with open(filename, "w+")as f:
f.close()
def run(self):
tLst = []
print(f"""
*******************************************************************
文件名: {self.filename}
文件大小:{self.filesize//(1024**2)} MB
启用线程: {self.threadCount}
*******************************************************************
""")
for i in range(self.threadCount):
t = threading.Thread(target=self.Range_Worker, args=(self, ))
t.setDaemon(True)
# print("Start thread: " + t.getName())
t.start()
tLst.append(t)
print("Thread strat success")
t = threading.Thread(target=self.Display_Progress, args=())
t.setDaemon(True)
t.start()
tLst.append(t)
for t in tLst:
t.join()
def Display_Progress(self):
num = 0
size = self.filesize // (1024**2)
preSize = 0
while num < 50:
num = int(50*(self.curSize/self.filesize))
# \033[32m 将后面文字设为绿色 \033[0m 设为默认
print(f"\r [\033[32m{'■'*num}{' '*(50-num)}\033[0m]{self.curSize//(1024**2)}/{size} MB {(self.curSize-preSize)//(1024**2)} MB/S", end="")
preSize = self.curSize
time.sleep(1)
def Range_Worker(self, downloader):
reqHeaders = self.headers.copy()
while True:
if self.curRangNum != 0 and self.curRangNum*self.maxBlockSize > self.filesize:
break
self.curRangNumLock.acquire()
rag = f"{self.curRangNum * self.maxBlockSize}-{(lambda x : x if x < self.filesize else self.EMPTY_STR)((self.curRangNum+1) * self.maxBlockSize-1)}"
self.curRangNum += 1
self.curRangNumLock.release()
reqHeaders["Range"] = "bytes=" + rag
response = requests.get(url=self.url, headers=reqHeaders, stream=True)
if response.ok:
# print(rag)
dataLst = []
for chunk in response.iter_content(chunk_size=4096):
self.fileLock.acquire()
self.curSize += len(chunk)
self.fileLock.release()
dataLst.append(chunk)
downloader.Write_Content(b''.join(dataLst), rag)
# print(response.headers.get("Content-Range") + "\n", reqHeaders, "\n" + rag)
else:
print(rag + f": Error status:{response.status_code}" )
response.raise_for_status()
def Write_Content(self, content, content_range):
seekIndex = int(content_range.split("-")[0])
self.fileLock.acquire()
with open(self.filename, "rb+")as f:
f.seek(seekIndex)
f.write(content)
self.fileLock.release()
# print(f"{content_range}")
def Auto_Get_Filesize(self) -> int:
headers = self.headers.copy()
headers["range"] = "bytes=0-0"
response = requests.get(self,url, headers=headers, stream=True)
if response.ok:
return (lambda x : int(x.split("-")[-1]) if x != None else self.ERROR_STATUS_VALUE)(response.headers.get("Content-Range"))
else:
response.raise_for_status()
def Auto_Get_Blocksize(self) -> int:
pass
if __name__ == "__main__":
# 下载测试
video = BiliBiliVideoDownloader("BV1w54y1y7Di")
video.Show_All_Page_Title()
# pageIndex = int(input("\nSelect page:"))
video.Show_Accept_Quality()
qualityIndex = int(input("\nSelect download quality:"))
for pageIndex in range(len(video.pagelist)):
print(f"\n{pageIndex}.{video.pagelist[pageIndex]['title']}")
video.Start_Download(pageIndex, qualityIndex)