需要了解的工具:
先看看我发的第一期专栏,里面有关开发者模式的介绍,一些相关的请求库与解析库。本期专栏将不再赘诉。
下载ffmpeg。下载官网链接:https://ffmpeg.org/download.html。
注意:如果想要在cmd中使用,需要先配置环境变量
有关于此的介绍与使用详细可以看此链接:https://zhuanlan.zhihu.com/p/356411237
本期将导入 subprocess 来用python执行ffmpeg的有关命令。
对于m4s的处理及相关源码(能直接运行):
本期处理依据来源于此:https://github.com/SocialSisterYi/bilibili-API-collect/blob/master/docs/video/videostream_url.md
m4s文件是单独的音频或者视频文本文件。
可以了解先以下b站的api设置。具体可以看此链接:
https://zhuanlan.zhihu.com/p/556023455。
通过请求b站的api能够返回含有url与backup_url的 json 文本
我们的目的就是请求以上的url。打开开发者模式:依次点击下图的五个位置:
五处的url即是我们需要的m4s路由文件。(由于是音视频文件分开,需要再找一个音频或视频文件)
源代码的简单实现:
import subprocess
import requests
import os
from pyquery import PyQuery as pq
# 以下url为视频的链接
url = 'https://www.bilibili.com/bangumi/play/ss41411?spm_id_from=333.337.0.0'
video_url = "https://upos-sz-mirror08c.bilivideo.com/upgcxcode/" \
"97/11/712561197/712561197-1-100023.m4s?e=ig8euxZM2rNcNbdlhoNv" \
"NC8BqJIzNbfqXBvEqxTEto8BTrNvN0GvT90W5JZMkX_YN0MvXg8gNEV4NC8xNEV4N03eN0B5t" \
"ZlqNxTEto8BTrNvNeZVuJ10Kj_g2UB02J0mN0B5tZlqNCNEto8BTrNvNC7MTX502C8f2jmMQJ6mqF2fka1mqx6gqj0e" \
"N0B599M=&uipk=5&nbs=1&deadline=1692694021&gen=playurlv2&os=08cbv&oi=0&trid=15fc5e14612545588fa9ec" \
"1d609c3114p&mid=0&platform=pc&upsig=bdd081413ca8384cf3027c35c408eeba&uparams=e,uipk,nbs,de" \
"adline,gen,os,oi,trid,mid,platform&bvc=vod&nettype=0&orderid=0,3&buvid=976164D1-4AB5-818C-E82" \
"6-C4759797536B85029infoc&build=0&f=p_0_0&agrr=1&bw=46110&logo=80000000"
audio_url = 'https://upos-sz-mirrorcos.bilivideo.c' \
'om/upgcxcode/97/11/712561197/712561197_nb3-1-30216.' \
'm4s?e=ig8euxZM2rNcNbdlhoNvNC8BqJIzNbfqXBvEqxTEto8BTrNvN0GvT90W5J' \
'ZMkX_YN0MvXg8gNEV4NC8xNEV4N03eN0B5tZlqNxTEto8BTrNvNeZVuJ10Kj_g2UB02J0mN0B5tZl' \
'qNCNEto8BTrNvNC7MTX502C8f2jmMQJ6mqF2fka1mqx6gqj0eN0B599M=&uipk=5&nbs=1&deadline=169269402' \
'1&gen=playurlv2&os=cosbv&oi=0&trid=15fc5e14612545588fa9ec1d609c3114p&mid=0&platform=pc&upsig=447e' \
'887251523ea45c037f165413cc15&uparams=e,uipk,nbs,deadline,gen,os,oi,trid,mid,pl' \
'atform&bvc=vod&nettype=0&orderid=0,3&buvid=976164D1-4AB5-818C-E826-C4759797536B85029info' \
'c&build=0&f=p_0_0&agrr=1&bw=5540&logo=80000000'
# 由于b站会验证防盗链,如referer或 UA 错误的情况会被判定为盗链.无法取流
headers = {
"Referer": "https://www.bilibili.com/bangumi/play/ss41411?spm_id_from=333.337.0.0",
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/"
"116.0.0.0 Safari/537.36"
}
html = requests.get(url).text
title = pq(html)('head > title').text()
# cmd:ffmpeg -i video.m4s -i audio.m4s -c:v copy -c:a copy -f mp4 Download_video.mp4
# 合成音频与视频文件
def merge_video_audio():
# 定义ffmpeg命令,ffmpeg命令为列表类型
ffmpeg_command = [
'ffmpeg',
'-i', 'video.m4s',
'-i', 'audio.m4s',
'-c:v', 'copy',
'-c:a', 'copy',
'-f', 'mp4',
f'{title}.mp4' # 输出文件路径
]
subprocess.run(ffmpeg_command)
print(f"Downloaded {title}")
# 下载音频与视频文件
def download_video(video_url, audio_url):
video_response = requests.get(video_url, headers=headers)
# print(video_response.status_code)
if video_response.status_code == 200:
with open('video.m4s', 'wb') as f:
f.write(video_response.content)
print(f"Downloaded video")
else:
print(f"Failed to download video")
audio_response = requests.get(audio_url, headers=headers)
if audio_response.status_code == 200:
with open('audio.m4s', 'wb') as f:
f.write(audio_response.content)
print(f"Downloaded audio")
else:
print(f"Failed to download audio")
# 对原音频文件与视频文件进行删除。需要保留原文件的可以不执行
def remove_files():
os.remove('video.m4s')
os.remove('audio.m4s')
if __name__ == '__main__':
download_video(video_url, audio_url)
merge_video_audio()
remove_files()
有关注释已放置在代码块中。
运行成功截图:
对于第一期与第二期的相关拓展:
有些动漫网站,network下没有ts文件的可以直接下载m3u8格式文件,将其扩展名改为txt。用文本编辑器打开后发现有ts文件的。就可以执行以下代码:
import requests
import os
from concurrent.futures import ThreadPoolExecutor
from pyquery import PyQuery as pq
from os.path import expanduser
# 此处为需要修改的参数
desktop_path = os.path.join(expanduser("~"), "Desktop")
path = fr"{desktop_path}\\video"
filename = fr"{desktop_path}\\7affff77b56af10b9dfa22b44d255de5.txt"
request_url = r'https://www.myifun.com/vplay/8290-6-1.html'
html = requests.get(request_url).text
title = pq(html)('head > title').text()
output_filename = fr"{desktop_path}\\{title}.mp4"
links = []
start_index = 0
last_index = 0
# 读取m3u8格式转化后的txt文本文件
def read_https_links(filename):
with open(filename, 'r') as file:
for line in file:
link = line.strip() # 去除换行符
if link.startswith('https://'):
links.append(link)
return links
# 下载每个ts文件
def download_links(index):
response = requests.get(links[index], timeout=20)
if response.status_code == 200:
with open(fr"{path}\\{index}.ts", "wb", buffering=8192) as f:
f.write(response.content)
print(f"Downloaded {links[index]}")
else:
print(f"Failed to download {links[index]}")
# 将多个ts文件合并成一个mp4文件
def merge_files(output_filename):
with open(output_filename, "ab") as output_file:
for index, link in enumerate(links, start=0):
with open(fr"{path}\\{index}.ts", "rb") as f:
output_file.write(f.read())
# 删除原本的ts文件保留MP4文件
def remove_files(path):
for index in range(start_index, last_index + 1):
input_filename = f"{path}\\{index}.ts"
os.remove(input_filename)
os.removedirs(path)
if not os.path.exists(path):
os.makedirs(path)
read_https_links(filename)
for index, link in enumerate(links, start=0):
last_index = index
# 多线程请求下载ts文件并检索遗漏ts文件进行下载
with ThreadPoolExecutor(max_workers=150) as executor:
executor.map(download_links, range(start_index, last_index + 1))
for index in range(start_index, last_index + 1):
if not os.path.exists(fr"{path}\\{index}.ts"):
response = requests.get(links[index], timeout=20)
if response.status_code == 200:
with open(fr"{path}\\{index}.ts", 'wb') as f:
f.write(response.content)
if __name__ == '__main__':
merge_files(output_filename)
remove_files(path)
print(f"All files merged into {output_filename}")
二期:通过点触验证码识别后,我们可以通过执行以下代码将cookies保存下来,下次直接使用cookies跳过点触验证码的识别。
def get_cookies():
bilibili_title = '哔哩哔哩 (゜-゜)つロ 干杯~-bilibili'
WebDriverWait(browser, 180, 0.5).until(EC.title_contains(bilibili_title))
if browser.title != bilibili_title:
print('页面加载失败')
login_cookies = {}
with open('ACookies.txt', 'w') as f:
for cookie in browser.get_cookies():
print(cookie)
# save_cookies(cookie)
cookie_str = json.dumps(cookie) + '\n'
f.write(cookie_str)
login_cookies[cookie['name']] = cookie['value']
return login_cookies
使用cookies跳过验证码识别代码:
from selenium import webdriver
import json
ListCookies = []
with open('ACookies.txt', 'r') as fw:
for line in fw:
cookie = json.loads(line.strip())
ListCookies.append(cookie)
driver = webdriver.Chrome()
driver.get('https://passport.bilibili.com/login')
for cookie in ListCookies:
driver.add_cookie(cookie)
driver.get('https://www.bilibili.com/')
如有错误或者改进之处,欢迎各位提出
出现的链接地址:
https://ffmpeg.org/download.html
https://zhuanlan.zhihu.com/p/356411237
https://github.com/SocialSisterYi/bilibili-API-collect/blob/master/docs/video/videostream_url.md