【笔记】m3u8文件转换MP4文件

Nydia.J

已于 2024-03-06 13:54:36 修改

阅读量610

点赞数 8

分类专栏：笔记 Python 文章标签： python

于 2024-03-04 13:59:33 首次发布

本文链接：https://blog.csdn.net/Auderiy/article/details/136449999

版权

Python 同时被 2 个专栏收录

4 篇文章 0 订阅

订阅专栏

笔记

2 篇文章 0 订阅

订阅专栏

【笔记】m3u8文件转换MP4文件

转换步骤:
1.下载m3u8文件
2.解析文件获取 key_url + iv_url + ts_url
3.下载ts文件
4.根据key将ts文件解密（经过AES-128加密后的文件下载后会无法播放，所以还需要进行解密。）
5.合并成MP4

"""
@Project ：-
@File ：-
@Author ：-
@Date ：-
"""
import os
import requests
from Crypto.Cipher import AES
from Crypto.Util.Padding import pad


class m3u8:
    def __init__(self):
        self.dirpath = os.path.dirname(__file__)
        self.headers = {
            'Cookie': '对应爬取平台的cookie值',
            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36'
        }
        self.key_urls = []
        self.iv_urls = []
        self.ts_urls = []

    def request_data(self, url):
        """
        请求页面
        @param url: 页面路径
        @param headers: 请求头
        @return: 相应内容
        """
        response = requests.get(url, headers=self.headers)
        return response

    def download(self, path, content):
        """
        写入文档
        @param path: 文件路径
        @param content: 写入内容
        @return: None
        """
        with open(path, "wb") as f:
            f.write(content)

    def get_m3u8(self, video_id, m3u8_url):
        """
        通过m3u8路径获取并下载m3u8文件
        @param video_id: 视频id
        @param m3u8_url: m3u8路径
        @return: m3u8文件路径
        """
        m3u8_path = os.path.join(self.dirpath, "video", "m3u8")
        os.makedirs(m3u8_path, exist_ok=True)
        m3u8_name = f"{video_id}.m3u8"
        response = self.request_data(m3u8_url)
        self.download(os.path.join(m3u8_path, m3u8_name), response.content)
        return os.path.join(m3u8_path, m3u8_name)

    def get_ts(self):
        """
        获取并下载ts文件
        @return: None
        """
        for ts_url in self.ts_urls:
            response = self.request_data(ts_url).content
            ts_path = os.path.join(self.dirpath, "video", "ts")
            os.makedirs(ts_path, exist_ok=True)
            ts_name = ts_url.split("?")[0].split("/")[-1]
            self.download(os.path.join(ts_path, ts_name), response)

    def parse_m3u8(self, m3u8_name):
        """
        解析m3u8文件 获取key和ts的路径
        @param m3u8_name: m3u8文件路径
        @return: None
        """
        self.key_urls = []
        self.iv_urls = []
        self.ts_urls = []
        with open(m3u8_name, "r", encoding="utf8") as f:
            for line in f.readlines():
                if line.startswith("#EXT-X-KEY"):
                    key_url = line.split('"', 2)[1]
                    self.key_urls.append(key_url)
                    iv_url = line.split('=')[-1].replace("\n", "").replace("0x", "")
                    self.iv_urls.append(iv_url)
                elif not line.startswith("#"):
                    self.ts_urls.append(line.strip())
    def decodeByKey(self):
        """
        将下载的ts解密并重新写入到新的ts文件中
        @return: None
        """
        for ts_url in self.ts_urls:
            ts_name = ts_url.split("?")[0].split("/")[-1]
            print(ts_name)
            key = self.request_data(self.key_urls[self.ts_urls.index(ts_url)]).content
            iv = self.iv_urls[self.ts_urls.index(ts_url)]
            print(key, iv)
            """
                :param key:
                    The secret key to use in the symmetric cipher.

                    It must be 16, 24 or 32 bytes long (respectively for *AES-128*,
                    *AES-192* or *AES-256*).
                    For ``MODE_SIV`` only, it doubles to 32, 48, or 64 bytes.
                :type key: bytes/bytearray/memoryview

                :param mode:
                    The chaining mode to use for encryption or decryption.
                    If in doubt, use ``MODE_EAX``.
                :type mode: One of the supported ``MODE_*`` constants
            """

            aes = AES.new(key=key, mode=AES.MODE_CBC, IV=bytes.fromhex(iv))

            temp_ts_path = os.path.join(self.dirpath, "video", "temp_ts")
            os.makedirs(temp_ts_path, exist_ok=True)
            with open(os.path.join(self.dirpath, "video", "ts", ts_name), "rb") as fr:
                with open(os.path.join(temp_ts_path, str(self.ts_urls.index(ts_url)) + "_temp_" + ts_name), "wb") as fw:
                    # 从加密的文件夹中读取文件
                    encrypted_data = fr.read()
                    # 获取长度
                    encrypted_data_len = len(encrypted_data)
                    # 判断当前的数据长度是不是16的倍数
                    if encrypted_data_len % 16 != 0:
                        # 把长度不是16的倍数的显示出来
                        # print(encrypted_data_len)
                        # 变为16的倍数
                        encrypted_data = pad(encrypted_data, 16)
                    # 进行解密
                    decrypt_data = aes.decrypt(encrypted_data)
                    # 将解密后的数据写入对应的解密文件
                    fw.write(decrypt_data)

    def merge(self, mp4_name):
        """
        合并解密后的ts文件
        @return: None
        """
        temp_ts_path = os.path.join(self.dirpath, "video", "temp_ts")
        mp4_path = os.path.join(self.dirpath, "video", "MP4")
        os.makedirs(mp4_path, exist_ok=True)
        files = os.listdir(temp_ts_path)
        # 合并ts文件，ts相应顺序：m3u8中ts路径顺序 （其他可能：ts文件名称升序）
        files.sort(key=lambda x: int(x.split('_')[0]))
        for file in files:
            if os.path.exists(os.path.join(temp_ts_path, file)):
                with open(os.path.join(temp_ts_path, file), 'rb') as fr:
                    with open(os.path.join(mp4_path, mp4_name), 'ab+') as fw:
                        fw.write(fr.read())
            else:
                print("失败")

    def main(self, video_id, m3u8_urls):
        # 1.下载m3u8文件
        # 2.解析文件  获取 key_url + iv_url + ts_url
        # 3.下载ts文件
        # 4.根据key将ts文件解密 （经过AES-128加密后的文件下载后会无法播放，所以还需要进行解密。）
        # 5.合并成MP4
        for m3u8_url in m3u8_urls:
            m3u8_name = self.get_m3u8(video_id, m3u8_url)
            self.parse_m3u8(m3u8_name)
            self.get_ts()
            self.decodeByKey()
            self.merge(f"{video_id}.mp4")

if __name__ == "__main__":
    m3u8_urls = ["https://service.sanjieke.cn/video/media/34282833/608p.m3u8?user_id=23251010&class_id=34282659&time=1709523724&nonce=924591&token=eb6e8e20c3c9943a7dd4f4d28c612a09b820bab7", ]
    video_id = '34261493'
    m3u8 = m3u8()
    m3u8.main(video_id, m3u8_urls)

m3u8的文件

具体参数参考：https://en.wikipedia.org/wiki/M3U#M3U8

"""
@Project ：-
@File ：-
@Author ：-
@Date ：-
"""
import asyncio
import os

import aiofiles
import requests
import aiohttp
from Crypto.Cipher import AES

dirpath = os.path.dirname(__file__)

headers = {
    'Cookie': '对应平台的cookie',
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36'
}

# ts合并成mp4的排序按照m3u8文件中ts顺序合并
ts_list = []

async def get_m3u8(video_id, m3u8_url):
    """
    获取m3u8文件，创建异步任务来获取ts文件
    @param video_id: 视频id
    @param m3u8_url: m3u8路径
    @return: None
    """
    m3u8_path = os.path.join(dirpath, "video", video_id, "m3u8")
    os.makedirs(m3u8_path, exist_ok=True)

    m3u8_name = f"{video_id}.m3u8"
    response = requests.get(url=m3u8_url, headers=headers).content
    async with aiofiles.open(os.path.join(m3u8_path, m3u8_name), "wb") as f1:
        await f1.write(response)

    tasks = []
    async with aiohttp.ClientSession(headers=headers) as session:
        async with aiofiles.open(os.path.join(m3u8_path, m3u8_name), "r", encoding="utf8") as f2:
            async for line in f2:
                if not line.startswith("#"):
                    ts_url = line.strip()
                    ts_list.append(ts_url.split("?")[0].split("/")[-1])
                    tasks.append(asyncio.create_task(get_ts(video_id, ts_url, session)))
            await asyncio.gather(*tasks)

async def get_ts(video_id, ts_url, session):
    """
    异步访问ts的url，下载ts文件
    @param video_id: 视频id
    @param ts_url: ts路径
    @param session: 发送HTTP请求的异步客户端会话对象
    @return: None
    """
    ts_path = os.path.join(dirpath, "video", video_id, "ts")
    os.makedirs(ts_path, exist_ok=True)
    async with session.get(url=ts_url) as resp:
        await asyncio.sleep(3)
        async with aiofiles.open(os.path.join(ts_path, ts_url.split("?")[0].split("/")[-1]), "wb") as f3:
            await f3.write(await resp.content.read())

async def decode(video_id):
    """
    解析m3u8文件，获取key_url和iv，并创建异步任务解密
    @param video_id: 视频id
    @return: None
    """
    tasks = []
    async with aiofiles.open(os.path.join(dirpath, "video", video_id, "m3u8", f"{video_id}.m3u8"), "r") as f4:
        async for line in f4:
            if line.startswith("#EXT-X-KEY"):
                key_url = line.split('"', 2)[1]
                iv = line.split('=')[-1].replace("\n", "").replace("0x", "")
                tasks.append(asyncio.create_task(decodeByKey(video_id, key_url, iv)))
        await asyncio.gather(*tasks)

async def decodeByKey(video_id, key_url, iv):
    """
    解密ts文件到temp_ts文件夹中
    @param video_id: 视频id
    @param key_url: key的路径
    @param iv: iv值
    @return: None
    """
    await asyncio.sleep(3)
    key = requests.get(url=key_url, headers=headers).content
    # key = "xNJTnwztadkoz4zr".encode('utf-8')
    aes = AES.new(key=key, mode=AES.MODE_CBC, IV=bytes.fromhex(iv))

    ts_path = os.path.join(dirpath, "video", video_id, "ts")
    temp_ts_path = os.path.join(dirpath, "video", video_id, "temp_ts")
    os.makedirs(temp_ts_path, exist_ok=True)
    for ts_name in ts_list:
        async with aiofiles.open(os.path.join(ts_path, ts_name), "rb") as f1:
            async with aiofiles.open(os.path.join(temp_ts_path, ts_name), "wb") as f2:
                bs = await f1.read()
                await f2.write(aes.decrypt(bs))

async def merge(video_id):
    """
    将解密后的ts文件合并成mp4文件
    @param video_id: 视频id
    @return: None
    """
    temp_ts_path = os.path.join(dirpath, "video", video_id, "temp_ts")
    mp4_path = os.path.join(dirpath, "video", video_id, "mp4")
    os.makedirs(mp4_path, exist_ok=True)

    for file in ts_list:
        async with aiofiles.open(os.path.join(temp_ts_path, file), 'rb') as f1:
            async with aiofiles.open(os.path.join(mp4_path, f"{video_id}.mp4"), 'ab') as f2:
                await f2.write(await f1.read())

async def main(m3u8s):
    for video_id, m3u8_url in m3u8s.items():
        await get_m3u8(video_id, m3u8_url)
        await decode(video_id)
        await merge(video_id)

if __name__ == '__main__':
    m3u8s = {
        '34261493': "https://service.sanjieke.cn/video/media/34282833/608p.m3u8?user_id=23251010&class_id=34282659&time=1709523724&nonce=924591&token=eb6e8e20c3c9943a7dd4f4d28c612a09b820bab7",
        # '34261494': "https://service.sanjieke.cn/video/media/34261506/608p.m3u8?user_id=23251010&class_id=34282659&time=1709532401&nonce=379255&token=70dffbd5850672d33ed154508716a9aded5c661f"
    }
    loop = asyncio.get_event_loop().run_until_complete(main(m3u8s))