如何使用python下载B站视频并使用ffmpeg进行合流操作

需要了解的工具:

先看看我发的第一期专栏,里面有关开发者模式的介绍,一些相关的请求库与解析库。本期专栏将不再赘诉。

下载ffmpeg。下载官网链接:https://ffmpeg.org/download.html

注意:如果想要在cmd中使用,需要先配置环境变量

有关于此的介绍与使用详细可以看此链接:https://zhuanlan.zhihu.com/p/356411237

本期将导入 subprocess 来用python执行ffmpeg的有关命令。

对于m4s的处理及相关源码(能直接运行):

本期处理依据来源于此:https://github.com/SocialSisterYi/bilibili-API-collect/blob/master/docs/video/videostream_url.md

m4s文件是单独的音频或者视频文本文件。

可以了解先以下b站的api设置。具体可以看此链接:

https://zhuanlan.zhihu.com/p/556023455

通过请求b站的api能够返回含有url与backup_url的 json 文本

我们的目的就是请求以上的url。打开开发者模式:依次点击下图的五个位置:

五处的url即是我们需要的m4s路由文件。(由于是音视频文件分开,需要再找一个音频或视频文件)

源代码的简单实现:

import subprocess
import requests
import os
from pyquery import PyQuery as pq

# 以下url为视频的链接
url = 'https://www.bilibili.com/bangumi/play/ss41411?spm_id_from=333.337.0.0'
video_url = "https://upos-sz-mirror08c.bilivideo.com/upgcxcode/" \
            "97/11/712561197/712561197-1-100023.m4s?e=ig8euxZM2rNcNbdlhoNv" \
            "NC8BqJIzNbfqXBvEqxTEto8BTrNvN0GvT90W5JZMkX_YN0MvXg8gNEV4NC8xNEV4N03eN0B5t" \
            "ZlqNxTEto8BTrNvNeZVuJ10Kj_g2UB02J0mN0B5tZlqNCNEto8BTrNvNC7MTX502C8f2jmMQJ6mqF2fka1mqx6gqj0e" \
            "N0B599M=&uipk=5&nbs=1&deadline=1692694021&gen=playurlv2&os=08cbv&oi=0&trid=15fc5e14612545588fa9ec" \
            "1d609c3114p&mid=0&platform=pc&upsig=bdd081413ca8384cf3027c35c408eeba&uparams=e,uipk,nbs,de" \
            "adline,gen,os,oi,trid,mid,platform&bvc=vod&nettype=0&orderid=0,3&buvid=976164D1-4AB5-818C-E82" \
            "6-C4759797536B85029infoc&build=0&f=p_0_0&agrr=1&bw=46110&logo=80000000"
audio_url = 'https://upos-sz-mirrorcos.bilivideo.c' \
            'om/upgcxcode/97/11/712561197/712561197_nb3-1-30216.' \
            'm4s?e=ig8euxZM2rNcNbdlhoNvNC8BqJIzNbfqXBvEqxTEto8BTrNvN0GvT90W5J' \
            'ZMkX_YN0MvXg8gNEV4NC8xNEV4N03eN0B5tZlqNxTEto8BTrNvNeZVuJ10Kj_g2UB02J0mN0B5tZl' \
            'qNCNEto8BTrNvNC7MTX502C8f2jmMQJ6mqF2fka1mqx6gqj0eN0B599M=&uipk=5&nbs=1&deadline=169269402' \
            '1&gen=playurlv2&os=cosbv&oi=0&trid=15fc5e14612545588fa9ec1d609c3114p&mid=0&platform=pc&upsig=447e' \
            '887251523ea45c037f165413cc15&uparams=e,uipk,nbs,deadline,gen,os,oi,trid,mid,pl' \
            'atform&bvc=vod&nettype=0&orderid=0,3&buvid=976164D1-4AB5-818C-E826-C4759797536B85029info' \
            'c&build=0&f=p_0_0&agrr=1&bw=5540&logo=80000000'
# 由于b站会验证防盗链,如referer或 UA 错误的情况会被判定为盗链.无法取流
headers = {
    "Referer": "https://www.bilibili.com/bangumi/play/ss41411?spm_id_from=333.337.0.0",
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/"
                  "116.0.0.0 Safari/537.36"
}
html = requests.get(url).text
title = pq(html)('head > title').text()

# cmd:ffmpeg -i video.m4s -i audio.m4s -c:v copy -c:a copy -f mp4 Download_video.mp4
# 合成音频与视频文件
def merge_video_audio():
    # 定义ffmpeg命令,ffmpeg命令为列表类型
    ffmpeg_command = [
        'ffmpeg',
        '-i', 'video.m4s',
        '-i', 'audio.m4s',
        '-c:v', 'copy',
        '-c:a', 'copy',
        '-f', 'mp4',
        f'{title}.mp4'     # 输出文件路径
    ]
    subprocess.run(ffmpeg_command)
    print(f"Downloaded {title}")

# 下载音频与视频文件
def download_video(video_url, audio_url):
    video_response = requests.get(video_url, headers=headers)
    # print(video_response.status_code)
    if video_response.status_code == 200:
        with open('video.m4s', 'wb') as f:
            f.write(video_response.content)
        print(f"Downloaded video")
    else:
        print(f"Failed to download video")
    audio_response = requests.get(audio_url, headers=headers)
    if audio_response.status_code == 200:
        with open('audio.m4s', 'wb') as f:
            f.write(audio_response.content)
        print(f"Downloaded audio")
    else:
        print(f"Failed to download audio")

# 对原音频文件与视频文件进行删除。需要保留原文件的可以不执行
def remove_files():
    os.remove('video.m4s')
    os.remove('audio.m4s')

if __name__  == '__main__':
    download_video(video_url, audio_url)
    merge_video_audio()
    remove_files()

有关注释已放置在代码块中。

运行成功截图:

对于第一期与第二期的相关拓展:


有些动漫网站,network下没有ts文件的可以直接下载m3u8格式文件,将其扩展名改为txt。用文本编辑器打开后发现有ts文件的。就可以执行以下代码:

import requests
import os
from concurrent.futures import ThreadPoolExecutor
from pyquery import PyQuery as pq
from os.path import expanduser

# 此处为需要修改的参数
desktop_path = os.path.join(expanduser("~"), "Desktop")
path = fr"{desktop_path}\\video"
filename = fr"{desktop_path}\\7affff77b56af10b9dfa22b44d255de5.txt"
request_url = r'https://www.myifun.com/vplay/8290-6-1.html'
html = requests.get(request_url).text
title = pq(html)('head > title').text()
output_filename = fr"{desktop_path}\\{title}.mp4"
links = []
start_index = 0
last_index = 0

# 读取m3u8格式转化后的txt文本文件
def read_https_links(filename):
    with open(filename, 'r') as file:
        for line in file:
            link = line.strip()  # 去除换行符
            if link.startswith('https://'):
                links.append(link)
    return links

# 下载每个ts文件
def download_links(index):
    response = requests.get(links[index], timeout=20)
    if response.status_code == 200:
        with open(fr"{path}\\{index}.ts", "wb", buffering=8192) as f:
            f.write(response.content)
            print(f"Downloaded {links[index]}")
    else:
        print(f"Failed to download {links[index]}")

# 将多个ts文件合并成一个mp4文件
def merge_files(output_filename):
    with open(output_filename, "ab") as output_file:
        for index, link in enumerate(links, start=0):
            with open(fr"{path}\\{index}.ts", "rb") as f:
                output_file.write(f.read())

# 删除原本的ts文件保留MP4文件
def remove_files(path):
    for index in range(start_index, last_index + 1):
        input_filename = f"{path}\\{index}.ts"
        os.remove(input_filename)
    os.removedirs(path)

if not os.path.exists(path):
    os.makedirs(path)

read_https_links(filename)
for index, link in enumerate(links, start=0):
    last_index = index

# 多线程请求下载ts文件并检索遗漏ts文件进行下载
with ThreadPoolExecutor(max_workers=150) as executor:
    executor.map(download_links, range(start_index, last_index + 1))
for index in range(start_index, last_index + 1):
    if not os.path.exists(fr"{path}\\{index}.ts"):
        response = requests.get(links[index], timeout=20)
        if response.status_code == 200:
            with open(fr"{path}\\{index}.ts", 'wb') as f:
                f.write(response.content)

if __name__ == '__main__':
    merge_files(output_filename)
    remove_files(path)
    print(f"All files merged into {output_filename}")


二期:通过点触验证码识别后,我们可以通过执行以下代码将cookies保存下来,下次直接使用cookies跳过点触验证码的识别。

def get_cookies():
    bilibili_title = '哔哩哔哩 (゜-゜)つロ 干杯~-bilibili'
    WebDriverWait(browser, 180, 0.5).until(EC.title_contains(bilibili_title))
    if browser.title != bilibili_title:
        print('页面加载失败')
    login_cookies = {}
    with open('ACookies.txt', 'w') as f:
        for cookie in browser.get_cookies():
            print(cookie)
            # save_cookies(cookie)
            cookie_str = json.dumps(cookie) + '\n'
            f.write(cookie_str)
        login_cookies[cookie['name']] = cookie['value']
    return login_cookies


使用cookies跳过验证码识别代码:

from selenium import webdriver
import json

ListCookies = []
with open('ACookies.txt', 'r') as fw:
    for line in fw:
        cookie = json.loads(line.strip())
        ListCookies.append(cookie)

driver = webdriver.Chrome()
driver.get('https://passport.bilibili.com/login')
for cookie in ListCookies:
    driver.add_cookie(cookie)
driver.get('https://www.bilibili.com/')


如有错误或者改进之处,欢迎各位提出

出现的链接地址:

https://ffmpeg.org/download.html

https://zhuanlan.zhihu.com/p/356411237

https://github.com/SocialSisterYi/bilibili-API-collect/blob/master/docs/video/videostream_url.md

https://zhuanlan.zhihu.com/p/556023455

  • 1
    点赞
  • 4
    收藏
    觉得还不错? 一键收藏
  • 1
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值