python3 视频分段下载

凌晨九点半
已于 2023-01-17 11:11:40 修改
阅读量333
点赞数
分类专栏：代码片段文章标签：音视频 python java
于 2023-01-17 10:51:47 首次发布
本文链接：https://blog.csdn.net/Darker2017/article/details/123910250
版权
代码片段专栏收录该内容
1 篇文章 0 订阅
订阅专栏
该代码实现了一个Python脚本，用于分块下载大视频文件，使用多线程并发处理，然后合并成完整的视频。它首先获取视频的总长度，根据设定的块大小进行分割，将任务放入队列，通过线程池执行下载任务，最后校验并合并文件。
摘要由CSDN通过智能技术生成
视频分段下载器

#!/usr/bin/env python
# -*- encoding: utf-8 -*-

"""
@author      : sirius
@file        : block_downloader.py
@time        : 2023/1/17
@description :
"""
import logging
import os
import time
import requests
from logging import handlers, Formatter
from concurrent.futures import ThreadPoolExecutor
from queue import Queue

logger = logging.getLogger(__name__)

class BlockDownLoader():
    """
        视频分段下载器
            1. 获取视频总长度 requests获取header['content-length']字段
            2. 根据块大小，对视频长度进行分割
            3. 把分割结果推到下载内部任务队列
            4. 对任务队列中的任务进行并发下载，并写入文件
            5. 对文件进行校验，合并本地文件片段，删除分割片段
    """

    def __init__(self, url, video_key, block_size=1024 * 1024 * 5, remove_block_file=True, max_threads=5, dir_prefix=None):
        """
        :param url: 视频可下载链接这里主要处理可直接下载的云链接
        :param video_key: 云存储的key,也作为视频文件下载生成路径目录的参考，主要目的是为了防止下载的文件相互覆盖
        :param block_size: 分块下载块的大小 默认5M
        :param remove_block_file: 是否清楚分块下载的临时文件 默认是
        :param max_threads: 最大线程数 默认5
        :param dir_prefix: 下载目录文件前缀目录
        """
        self._init_log()
        curr_dirname = os.path.abspath(os.path.dirname(__file__))
        output_path, ext = os.path.splitext(video_key)
        dir_path, file_name = os.path.split(video_key)

        self.url = url
        if dir_prefix:
            self.output_path = f"{dir_prefix}/{output_path}"
        else:
            self.output_path = f"{curr_dirname}/{output_path}"
        self.file_name = file_name
        self.block_size = block_size
        self.max_threads = max_threads
        self.remove_block_file = remove_block_file
        self.q_file_section = Queue()

        self.file_path = f"{self.output_path}/{self.file_name}"
        self.section_count = 0
        self.video_size = 0
        logger.info(f"curr_path={curr_dirname}")
        logger.info(f"dir_path={self.output_path}")
        logger.info(f"file_name={self.file_name}")
        logger.info(f"file_path={self.file_path}")

    def _ensure_dir(self, local_path):
        dirname = os.path.dirname(local_path)
        if not os.path.isdir(dirname):
            os.makedirs(dirname)

    def _init_log(self):
        CURRENT_PATH = os.path.dirname(os.path.abspath(__file__))
        ROOT_PATH = os.path.join(CURRENT_PATH, os.pardir)
        logger_dir = os.path.join(ROOT_PATH, 'logs')
        self._ensure_dir(logger_dir)

        logger = logging.getLogger()
        logger_path = os.path.join(logger_dir, 'downloader.log')
        format = "%(asctime)s %(levelname)s: %(message)s [in %(pathname)s:%(lineno)d]"
        file_handler = logging.handlers.TimedRotatingFileHandler(logger_path, 'midnight', 1, 0)
        file_handler.setFormatter(Formatter(format))
        file_handler.suffix = "%Y%m%d-%H%M.log"
        console_handler = logging.StreamHandler()
        console_handler.setFormatter(Formatter(format))
        logger.addHandler(file_handler)
        logger.addHandler(console_handler)
        logger.setLevel(logging.INFO)


    def get_video_size(self):
        for i in range(3):
            try:
                response = requests.get(self.url, stream=True)
                content_length = int(response.headers.get("content-length", None))
                if content_length:
                    self.video_size = content_length
                    return content_length
                else:
                    logger.info(f"content-length={content_length}")
            except Exception as e:
                continue
        return None

    def split_video_block(self, video_size):
        """
            根据content-length分割视频文件
        :return:
        """
        if video_size <= 0:
            return None

        split_num = (video_size) // self.block_size + 1
        # 分割文件，将分割结果加入下载队列中
        sections = {}
        for i in range(split_num):
            if i + 1 != split_num:
                sections[str(i)] = (i * self.block_size, (i + 1) * self.block_size - 1)
            else:
                sections[str(i)] = (i * self.block_size, video_size -1 )
        return sections

    def add_video_block_to_queue(self, section_dict):
        self.section_count = len(section_dict.keys())
        for id in list(section_dict.keys()):
            section = section_dict.pop(id)
            # 将文件片段加入到待下载队列中
            self.q_file_section.put({"id": id, "size": int(section[1] - section[0] + 1), "section": f"bytes={section[0]}-{section[1]}"})

    def download_executor(self):
        with ThreadPoolExecutor(max_workers=self.max_threads) as executor:
            for i in range(self.section_count):
                section = self.q_file_section.get()
                executor.submit(self.download, section)

    def download(self, section_sub_dict):
        section_size = section_sub_dict.get("size")
        section = section_sub_dict.get("section")
        split_id = section_sub_dict.get("id")

        sub_file_path = f"{self.output_path}/{split_id}.mp4"
        if os.path.exists(sub_file_path):
            logger.info(f"{sub_file_path} exists")
            return

        # 构建请求头
        headers = {"Range": section}
        logger.info(f"block_{split_id} start download path={sub_file_path}")
        stream_content = requests.get(url=self.url, headers=headers, stream=True).content
        stream_size = len(stream_content)
        logger.info(f"block_{split_id} end download stream_size={stream_size} section_size={section_size}")

        if section_size == stream_size:
            with open(sub_file_path, mode="wb") as f:
                f.write(stream_content)
            logger.info(f"block_{split_id} write complete")

    def check_section(self):
        file_size = 0
        # 校验分割文件数是否合理
        if self.section_count == 0:
            return False
        # 校验分割文件是否存在
        for i in range(self.section_count):
            sub_file_path = f"{self.output_path}/{i}.mp4"
            if os.path.exists(sub_file_path):
                file_size += os.path.getsize(sub_file_path)
            else:
                return False

        return file_size == self.video_size


    def check_and_merge_section(self):
        check_status = self.check_section()
        logger.info(f"{self.file_path} check_block_status={check_status}")
        if check_status:
            if self.section_count == 1:
                os.rename(f"{self.output_path}/0.mp4", self.file_path)
                return True
            else:
                # 文件合并
                with open(self.file_path, "ab") as result_file:
                    for i in range(self.section_count):
                        sub_file_path = f"{self.output_path}/{i}.mp4"
                        with open(sub_file_path, "rb") as f:
                            result_file.write(f.read())
                return True
        else:
            return False

    def clear_video_block(self):
        if self.remove_block_file and self.section_count > 1:
            for i in range(self.section_count):
                sub_file_path = f"{self.output_path}/{i}.mp4"
                os.remove(sub_file_path)

    def run(self):
        try:
            logger.info(f"{self.file_path} start download ")
            if not os.path.exists(self.output_path):
                os.makedirs(self.output_path)

            # 获取视频总长度
            video_size = self.get_video_size()
            if video_size and os.path.exists(self.file_path) and os.path.getsize(self.file_path) == video_size:
                logger.info(f"{self.file_path} exists")
                return self.file_path

            # 对视频长度进行分割
            section_dict = self.split_video_block(video_size)
            logger.info(f"{self.file_path} split_video_block len={len(section_dict)}")
            # 把分割结果推入任务队列
            self.add_video_block_to_queue(section_dict)
            # 开始多线程分块下载
            self.download_executor()

            # 视频合并
            flag = self.check_and_merge_section()

            # 清理中间文件
            if not flag:
                return None
            else:
                self.clear_video_block()
        except Exception as e:
            os.remove(self.output_path)
            return None
        return self.file_path


if __name__ == '__main__':
    start_time = int(time.time() * 1000)

    url = "$视频可下载链接"
    video_key = "sirius/video_key.mp4"
    down_loader = BlockDownLoader(url, video_key, block_size=1024 * 1024 * 5)
    result_path = down_loader.run()

    end_time = int(time.time() * 1000)
    cost_time = end_time - start_time
    logging.info(f"result = {result_path}, cost time: {str(cost_time)} ms")