一分钟学会使用python代码批量下载B站视频（付源码下载链接）

weixin_58006135

于 2024-09-15 12:26:00 发布

阅读量183

点赞数 10

分类专栏：源码分享文章标签： python 音视频开发语言

本文链接：https://blog.csdn.net/weixin_58006135/article/details/142281781

版权

源码分享专栏收录该内容

1 篇文章 0 订阅

订阅专栏

项目场景：

在浏览B站视频时，遇到喜欢的视频，收藏后有与B站本身原因或者up主自身原因过段时间会失效，
今天呢，我来教大家怎么去在B站里面去下载视频。
`

解决方案：

复制BV号：

打开B站的视频链接地址
在这里插入图片描述

运行代码

在这里插入图片描述

查看结果

在这里插入图片描述

部分核心代码展示

import subprocess
import requests
import re
import pprint

headers = {
    'referer': 'https://www.bilibili.com/',
    'user-agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/89.0.4389.114 Safari/537.36'
}

def get_response(html_url):
    """发送请求函数"""
    response = requests.get(url=html_url, headers=headers)
    return response

def clean_title(title):
    """清理标题并提取文件名"""
    title = re.sub(r'[\/:*?"<>|]', '', title)  # 去除特殊字符
    punctuation_indices = [m.start() for m in re.finditer(r'[^\w\s]', title)]  # 查找所有标点符号
    if punctuation_indices:
        first_punct_index = punctuation_indices[0]
        if first_punct_index == 0 and len(punctuation_indices) > 1:
            first_punct_index = punctuation_indices[1]
        title = title[:first_punct_index]  # 取第一个标点符号前的内容
    title = title.strip()[:50]  # 限制文件名长度
    return title

def get_video_info(html_url):
    """获取视频标题 / CID / session"""
    response = get_response(html_url)
    cid = re.findall('"cid":(\d+),', response.text)[0]
    session = re.findall('"session":"(.*?)"', response.text)[0]
    title = re.findall('<h1[^>]+title="(.*?)"', response.text)[0].replace(' ', '')
    title = clean_title(title)  # 清理标题
    print(cid, session, title)
    video_info = [cid, session, title]
    return video_info

def get_video_content(cid, session, bv_id):
    index_url = 'https://api.bilibili.com/x/player/playurl'
    data = {
        'cid': cid,
        'qn': '0',
        'type': '',
        'otype': 'json',
        'fourk': '1',
        'bvid': bv_id,
        'fnver': '0',
        'fnval': '976',
        'session': session,
    }
    json_data = requests.get(url=index_url, params=data, headers=headers).json()
    audio_url = json_data['data']['dash']['audio'][0]['baseUrl']
    video_url = json_data['data']['dash']['video'][0]['baseUrl']
    print(audio_url, video_url)
    video_content = [audio_url, video_url]
    return video_content

def save(title, audio_url, video_url):
    """保存数据"""
    audio_content = get_response(audio_url).content
    video_content = get_response(video_url).content
    with open(title + '.mp3', mode='wb') as f:
        f.write(audio_content)
    with open(title + '.mp4', mode='wb') as f:
        f.write(video_content)
    print(title, '保存完成')