【笔记】m3u8文件转换MP4文件
转换步骤:
1.下载m3u8文件
2.解析文件 获取 key_url + iv_url + ts_url
3.下载ts文件
4.根据key将ts文件解密 (经过AES-128加密后的文件下载后会无法播放,所以还需要进行解密。)
5.合并成MP4
"""
@Project :-
@File :-
@Author :-
@Date :-
"""
import os
import requests
from Crypto.Cipher import AES
from Crypto.Util.Padding import pad
class m3u8:
def __init__(self):
self.dirpath = os.path.dirname(__file__)
self.headers = {
'Cookie': '对应爬取平台的cookie值',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36'
}
self.key_urls = []
self.iv_urls = []
self.ts_urls = []
def request_data(self, url):
"""
请求页面
@param url: 页面路径
@param headers: 请求头
@return: 相应内容
"""
response = requests.get(url, headers=self.headers)
return response
def download(self, path, content):
"""
写入文档
@param path: 文件路径
@param content: 写入内容
@return: None
"""
with open(path, "wb") as f:
f.write(content)
def get_m3u8(self, video_id, m3u8_url):
"""
通过m3u8路径获取并下载m3u8文件
@param video_id: 视频id
@param m3u8_url: m3u8路径
@return: m3u8文件路径
"""
m3u8_path = os.path.join(self.dirpath, "video", "m3u8")
os.makedirs(m3u8_path, exist_ok=True)
m3u8_name = f"{video_id}.m3u8"
response = self.request_data(m3u8_url)
self.download(os.path.join(m3u8_path, m3u8_name), response.content)
return os.path.join(m3u8_path, m3u8_name)
def get_ts(self):
"""
获取并下载ts文件
@return: None
"""
for ts_url in self.ts_urls:
response = self.request_data(ts_url).content
ts_path = os.path.join(self.dirpath, "video", "ts")
os.makedirs(ts_path, exist_ok=True)
ts_name = ts_url.split("?")[0].split("/")[-1]
self.download(os.path.join(ts_path, ts_name), response)
def parse_m3u8(self, m3u8_name):
"""
解析m3u8文件 获取key和ts的路径
@param m3u8_name: m3u8文件路径
@return: None
"""
self.key_urls = []
self.iv_urls = []
self.ts_urls = []
with open(m3u8_name, "r", encoding="utf8") as f:
for line in f.readlines():
if line.startswith("#EXT-X-KEY"):
key_url = line.split('"', 2)[1]
self.key_urls.append(key_url)
iv_url = line.split('=')[-1].replace("\n", "").replace("0x", "")
self.iv_urls.append(iv_url)
elif not line.startswith("#"):
self.ts_urls.append(line.strip())
def decodeByKey(self):
"""
将下载的ts解密并重新写入到新的ts文件中
@return: None
"""
for ts_url in self.ts_urls:
ts_name = ts_url.split("?")[0].split("/")[-1]
print(ts_name)
key = self.request_data(self.key_urls[self.ts_urls.index(ts_url)]).content
iv = self.iv_urls[self.ts_urls.index(ts_url)]
print(key, iv)
"""
:param key:
The secret key to use in the symmetric cipher.
It must be 16, 24 or 32 bytes long (respectively for *AES-128*,
*AES-192* or *AES-256*).
For ``MODE_SIV`` only, it doubles to 32, 48, or 64 bytes.
:type key: bytes/bytearray/memoryview
:param mode:
The chaining mode to use for encryption or decryption.
If in doubt, use ``MODE_EAX``.
:type mode: One of the supported ``MODE_*`` constants
"""
aes = AES.new(key=key, mode=AES.MODE_CBC, IV=bytes.fromhex(iv))
temp_ts_path = os.path.join(self.dirpath, "video", "temp_ts")
os.makedirs(temp_ts_path, exist_ok=True)
with open(os.path.join(self.dirpath, "video", "ts", ts_name), "rb") as fr:
with open(os.path.join(temp_ts_path, str(self.ts_urls.index(ts_url)) + "_temp_" + ts_name), "wb") as fw:
# 从加密的文件夹中读取文件
encrypted_data = fr.read()
# 获取长度
encrypted_data_len = len(encrypted_data)
# 判断当前的数据长度是不是16的倍数
if encrypted_data_len % 16 != 0:
# 把长度不是16的倍数的显示出来
# print(encrypted_data_len)
# 变为16的倍数
encrypted_data = pad(encrypted_data, 16)
# 进行解密
decrypt_data = aes.decrypt(encrypted_data)
# 将解密后的数据写入对应的解密文件
fw.write(decrypt_data)
def merge(self, mp4_name):
"""
合并解密后的ts文件
@return: None
"""
temp_ts_path = os.path.join(self.dirpath, "video", "temp_ts")
mp4_path = os.path.join(self.dirpath, "video", "MP4")
os.makedirs(mp4_path, exist_ok=True)
files = os.listdir(temp_ts_path)
# 合并ts文件,ts相应顺序:m3u8中ts路径顺序 (其他可能:ts文件名称升序)
files.sort(key=lambda x: int(x.split('_')[0]))
for file in files:
if os.path.exists(os.path.join(temp_ts_path, file)):
with open(os.path.join(temp_ts_path, file), 'rb') as fr:
with open(os.path.join(mp4_path, mp4_name), 'ab+') as fw:
fw.write(fr.read())
else:
print("失败")
def main(self, video_id, m3u8_urls):
# 1.下载m3u8文件
# 2.解析文件 获取 key_url + iv_url + ts_url
# 3.下载ts文件
# 4.根据key将ts文件解密 (经过AES-128加密后的文件下载后会无法播放,所以还需要进行解密。)
# 5.合并成MP4
for m3u8_url in m3u8_urls:
m3u8_name = self.get_m3u8(video_id, m3u8_url)
self.parse_m3u8(m3u8_name)
self.get_ts()
self.decodeByKey()
self.merge(f"{video_id}.mp4")
if __name__ == "__main__":
m3u8_urls = ["https://service.sanjieke.cn/video/media/34282833/608p.m3u8?user_id=23251010&class_id=34282659&time=1709523724&nonce=924591&token=eb6e8e20c3c9943a7dd4f4d28c612a09b820bab7", ]
video_id = '34261493'
m3u8 = m3u8()
m3u8.main(video_id, m3u8_urls)
m3u8的文件
- 具体参数参考:https://en.wikipedia.org/wiki/M3U#M3U8
"""
@Project :-
@File :-
@Author :-
@Date :-
"""
import asyncio
import os
import aiofiles
import requests
import aiohttp
from Crypto.Cipher import AES
dirpath = os.path.dirname(__file__)
headers = {
'Cookie': '对应平台的cookie',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36'
}
# ts合并成mp4的排序按照m3u8文件中ts顺序合并
ts_list = []
async def get_m3u8(video_id, m3u8_url):
"""
获取m3u8文件,创建异步任务来获取ts文件
@param video_id: 视频id
@param m3u8_url: m3u8路径
@return: None
"""
m3u8_path = os.path.join(dirpath, "video", video_id, "m3u8")
os.makedirs(m3u8_path, exist_ok=True)
m3u8_name = f"{video_id}.m3u8"
response = requests.get(url=m3u8_url, headers=headers).content
async with aiofiles.open(os.path.join(m3u8_path, m3u8_name), "wb") as f1:
await f1.write(response)
tasks = []
async with aiohttp.ClientSession(headers=headers) as session:
async with aiofiles.open(os.path.join(m3u8_path, m3u8_name), "r", encoding="utf8") as f2:
async for line in f2:
if not line.startswith("#"):
ts_url = line.strip()
ts_list.append(ts_url.split("?")[0].split("/")[-1])
tasks.append(asyncio.create_task(get_ts(video_id, ts_url, session)))
await asyncio.gather(*tasks)
async def get_ts(video_id, ts_url, session):
"""
异步访问ts的url,下载ts文件
@param video_id: 视频id
@param ts_url: ts路径
@param session: 发送HTTP请求的异步客户端会话对象
@return: None
"""
ts_path = os.path.join(dirpath, "video", video_id, "ts")
os.makedirs(ts_path, exist_ok=True)
async with session.get(url=ts_url) as resp:
await asyncio.sleep(3)
async with aiofiles.open(os.path.join(ts_path, ts_url.split("?")[0].split("/")[-1]), "wb") as f3:
await f3.write(await resp.content.read())
async def decode(video_id):
"""
解析m3u8文件,获取key_url和iv,并创建异步任务解密
@param video_id: 视频id
@return: None
"""
tasks = []
async with aiofiles.open(os.path.join(dirpath, "video", video_id, "m3u8", f"{video_id}.m3u8"), "r") as f4:
async for line in f4:
if line.startswith("#EXT-X-KEY"):
key_url = line.split('"', 2)[1]
iv = line.split('=')[-1].replace("\n", "").replace("0x", "")
tasks.append(asyncio.create_task(decodeByKey(video_id, key_url, iv)))
await asyncio.gather(*tasks)
async def decodeByKey(video_id, key_url, iv):
"""
解密ts文件到temp_ts文件夹中
@param video_id: 视频id
@param key_url: key的路径
@param iv: iv值
@return: None
"""
await asyncio.sleep(3)
key = requests.get(url=key_url, headers=headers).content
# key = "xNJTnwztadkoz4zr".encode('utf-8')
aes = AES.new(key=key, mode=AES.MODE_CBC, IV=bytes.fromhex(iv))
ts_path = os.path.join(dirpath, "video", video_id, "ts")
temp_ts_path = os.path.join(dirpath, "video", video_id, "temp_ts")
os.makedirs(temp_ts_path, exist_ok=True)
for ts_name in ts_list:
async with aiofiles.open(os.path.join(ts_path, ts_name), "rb") as f1:
async with aiofiles.open(os.path.join(temp_ts_path, ts_name), "wb") as f2:
bs = await f1.read()
await f2.write(aes.decrypt(bs))
async def merge(video_id):
"""
将解密后的ts文件合并成mp4文件
@param video_id: 视频id
@return: None
"""
temp_ts_path = os.path.join(dirpath, "video", video_id, "temp_ts")
mp4_path = os.path.join(dirpath, "video", video_id, "mp4")
os.makedirs(mp4_path, exist_ok=True)
for file in ts_list:
async with aiofiles.open(os.path.join(temp_ts_path, file), 'rb') as f1:
async with aiofiles.open(os.path.join(mp4_path, f"{video_id}.mp4"), 'ab') as f2:
await f2.write(await f1.read())
async def main(m3u8s):
for video_id, m3u8_url in m3u8s.items():
await get_m3u8(video_id, m3u8_url)
await decode(video_id)
await merge(video_id)
if __name__ == '__main__':
m3u8s = {
'34261493': "https://service.sanjieke.cn/video/media/34282833/608p.m3u8?user_id=23251010&class_id=34282659&time=1709523724&nonce=924591&token=eb6e8e20c3c9943a7dd4f4d28c612a09b820bab7",
# '34261494': "https://service.sanjieke.cn/video/media/34261506/608p.m3u8?user_id=23251010&class_id=34282659&time=1709532401&nonce=379255&token=70dffbd5850672d33ed154508716a9aded5c661f"
}
loop = asyncio.get_event_loop().run_until_complete(main(m3u8s))
参考文件:
https://blog.csdn.net/qq_18625571/article/details/129616682
https://github.com/Coder-Sakura/sanjieke
https://blog.csdn.net/weixin_48368715/article/details/122143343