之前通过抓流获取某些网站播放视频的m3u8路径,然后把里面的视频拉下来合并,总觉得不方便,然后尝试了从网页解析m3u8的路径,后来发现m3u8还有个套娃,进行二次解析m3u8,最后成功拉下视频
#m3u8.py
import sys
import requests
import os
import re
def get_m3u8_url(url):
response = requests.get(url)
web_file = 'web.txt'
file = open(web_file, 'w', encoding='utf-8')
if response.status_code == 200:
content = response.text # 获取网页内容作为字符串
content=content.replace('\/','/')
file.write(content)
pattern = r'(http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+.m3u8)'
m3u8_links = re.findall(pattern,content)
return m3u8_links[0]
def get_m3u8_url2(url):
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0.0.0 Safari/537.36 Edg/125.0.0.0',
'Referer': 'https://www.baidu.com/'
}
url_file = requests.get(url, headers=headers).text
for line in url_file.split('\n'):
if not line.startswith('#') and not line=='' and line.split('.')[-1]=='m3u8':
return os.path.dirname(url)+'/'+line
return url
if len(sys.argv)<=1:
print('请提供url参数。')
exit()
url = sys.argv[1]
url_type=url.split('.')[-1]
if url_type=='html' or url_type=='htm':
url=get_m3u8_url(url)
url=get_m3u8_url2(url)
print (url)
elif not url_type=='m3u8':
print('url有误,非m3u8:', url.split('.')[-1])
exit()
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0.0.0 Safari/537.36 Edg/125.0.0.0',
'Referer': 'https://www.baidu.com/'
}
m3u8_file = requests.get(url, headers=headers).text
# 通过分析每个ts下载地址的前缀
ts_preurl=os.path.dirname(url)+'/'
print (f'pre:{ts_preurl}')
# 通过正则取到ts数据的下载地址
ts_url_list = []
i=0
for line in m3u8_file.split('\n'):
if not line.startswith('#') and not line=='':
ts_url = ts_preurl + line
ts_url_list.append(ts_url)
i=i+1
input_file = 'ts.txt'
file = open(input_file, 'w', encoding='utf-8')
total=i
i=0
for ts_url in ts_url_list:
# 定义视频名称
ts_name = ts_url.split('/')[-1]
i = i + 1
# 下载视频
if not os.path.exists(f'./tslib/{ts_name}'):
# 获取ts视频数据
ts_data = requests.get(url=ts_url, headers=headers).content
with open(f'./tslib/{ts_name}', 'wb')as f:
f.write(ts_data)
#print("xia")
f = f"file './tslib/{ts_name}'" # 这里配置自己的ts文件路径就行
file.write(f + '\n')
print(f'视频下载成功 {i}/{total}:{ts_name}')
# ffmpeg -f concat -safe 0 -i ts.txt -c copy out.mp4
# 拼接文件名保存到ts.txt文本中
# 合并视频
# 定义导出视频的名字
output = 'output.mp4'
ffmpeg_path = r"C:\ffmpeg\ffmpeg.exe"
cmd = ffmpeg_path + " -f concat -safe 0 -i " + input_file + " -c copy " + output
print(cmd)
if os.path.exists(output):
os.remove(output)
os.popen(cmd)
print('合并ts文件成功')
运行命令形如
python m3u8.py http://.....html
代码中ffmpeg_path自行设置,运行文件夹需有tslib的子文件夹放置临时ts文件,视频合并成功后ts文件可自行删除