python爬取m3u8文件并合并输出

fsyysf

已于 2024-09-17 01:28:23 修改

阅读量313

点赞数 1

文章标签： python 开发语言

于 2024-09-17 01:09:31 首次发布

本文链接：https://blog.csdn.net/fsyysf/article/details/142309550

版权

最近遇到了有些网页的视频不能下载，研究后发现是m3u8的，片段视频是ts格式，参考了网上的python爬虫资料，最终下载成功，记录一下，用到ffmpeg，自行备好软件。

import sys
import requests
import os

if len(sys.argv)<=1:
    print('请提供url参数。')
    exit()

url = sys.argv[1]
if(not url.split('.')[-1]=='m3u8'):
    print('url有误，非m3u8:', url.split('.')[-1])
    exit()

headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0.0.0 Safari/537.36 Edg/125.0.0.0',
    'Referer': 'https://www.baidu.com/'
}

m3u8_file = requests.get(url, headers=headers).text

# 通过分析每个ts下载地址的前缀
ts_preurl=os.path.dirname(url)+'/'
print (f'pre:{ts_preurl}')

# 通过正则取到ts数据的下载地址
ts_url_list = []
i=0
for line in m3u8_file.split('\n'):
    if not line.startswith('#') and not line=='':
        ts_url = ts_preurl + line
        ts_url_list.append(ts_url)
        i=i+1
input_file = 'ts.txt'
file = open(input_file, 'w', encoding='utf-8')
total=i
i=0
for ts_url in ts_url_list:
    # 定义视频名称
    ts_name = ts_url.split('/')[-1]
    i = i + 1

    # 下载视频
    if not os.path.exists(f'./tslib/{ts_name}'):
        # 获取ts视频数据
        ts_data = requests.get(url=ts_url, headers=headers).content
        with open(f'./tslib/{ts_name}', 'wb')as f:
            f.write(ts_data)
            #print("xia")
    f = f"file './tslib/{ts_name}'"  # 这里配置自己的ts文件路径就行
    file.write(f + '\n')
    print(f'视频下载成功 {i}/{total}：{ts_name}')

# ffmpeg -f concat -safe 0 -i ts.txt -c copy out.mp4

# 拼接文件名保存到ts.txt文本中
# 合并视频
# 定义导出视频的名字
output = 'output.mp4'
ffmpeg_path = r"C:\ffmpeg\ffmpeg.exe"
cmd = ffmpeg_path + " -f concat -safe 0 -i " + input_file + " -c copy " + output
print(cmd)
if os.path.exists(output):
        os.remove(output)
# 调用cmd命令行执行ffmpeg拼接视频
os.popen(cmd)
#with os.popen(cmd) as popen_object:
#    output = popen_object.read()
#print(output)
print('合并ts文件成功')

#删除下载的ts文件
for ts_url in ts_url_list:
    # 定义视频名称
    ts_name = ts_url.split('/')[-1]
    # 下载视频
    if os.path.exists(f'./tslib/{ts_name}'):
        os.remove(f'./tslib/{ts_name}')

print('删除ts文件成功')

ffmpeg_path根据实际路径修改一下

m3u8的url写在执行参数里，形如python m3u8.py http://***/***.m3u8

合并完后会删除下载的ts文件，如果不想删除，可以屏蔽相应代码