多线程下载一个m3u8并转MP4
入门一个月照猫画虎写的,欢迎大佬指导
在python里面几乎完美运行,可是在用了pyinstaller打包为exe之后却偶尔因为报错卡壳
过程中的操作:
一、手动获取一个m3u8链接,并读出基础部分(以后会写自动读取网站上的link)
二、拼接ts地址
三、分线程
四、下载链接文件
五、拼接文件为MP4,清理缓存
#目标是多线程下载一个m3u8并转MP4
#5.24成功了,还有个问题,服务器强制关闭了链接我卡在了多线程里面出不来,如何防止检测和安全退出进程呢,其实没关系
#day5.25关闭防火墙,不然有的会连不上,还有一个问题异常之后漏了几个,漏报了一个异常
#day5.26 基本完善
#防火墙防火墙防火墙别忘关
#python m3u8tomp4.py
import threading
import requests
import re
import os
from os import path
import shutil
import urllib3
import socket
import time
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
socket.setdefaulttimeout(15)
def get_ts_url(m3u8_path,base_url):#获得每一个ts地址
urls=[]
with open(m3u8_path,'r')as file:
lines=file.readlines()
for line in lines:
if line.endswith('.ts\n'):
urls.append(base_url+line.strip('\n'))
return urls
def down_ts(start,end,ts_urls,d_path):#轮流下载ts
for ts_url in ts_urls[start:end]:
#for ts_url in ts_urls:
ts_name=ts_url.split('/')[-1]
#print('downloading %s'%ts_name)
header={
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.25 Safari/537.36 Core/1.70.3741.400 QQBrowser/10.5.3863.400'
#'user-agent':'Mozilla/5.0 (Linux; Android 5.0; SM-G900P Build/LRX21T) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.25 Mobile Safari/537.36'
}
# requests.adapters.DEFAULT_RETRIES = 5
try:
response=requests.get(ts_url,headers=header,stream=True,verify=False)#stream是大文件等.iter_content再下载,verify跳过ssl
except Exception as e:
global tsurlleft
tsurlleft.append(ts_url)
print("异常请求:%s"%e.args)
continue
#return停止了循环,是之前犯的错误
#pass之后卡住了
ts_path = d_path+"/{0}".format(ts_name)
with open(ts_path,"wb+") as file:
for chunk in response.iter_content(chunk_size=1024):
if chunk:
file.write(chunk)
response.close()
time.sleep(2)
#print("%s 下载完成" %ts_name)
def file_walker(path):#轮询路径下文件获得名字列表
file_list = []
for root, dirs, files in os.walk(path): # 生成器
for fn in files:
p = str(root+'/'+fn)
file_list.append(p)
#print(file_list)
return file_list
def combine(d_path, combine_path, file_name):#合并获得的列表
file_list = file_walker(d_path)
file_path = combine_path + '\\'+file_name + '.mp4'
with open(file_path, 'wb+') as fw:
for i in range(len(file_list)):
fw.write(open(file_list[i], 'rb').read())
print('合成完毕')
shutil.rmtree(d_path)
print('缓存清理完毕')
def thre(num_thread):#多线程
part = file_size // num_thread # 如果不能整除,最后一块应该多几个字节
for i in range(num_thread):
start = part * i
if i == num_thread - 1: # 最后一块
end = file_size
else:
end = start + part
t = threading.Thread(target=down_ts, kwargs={'start': start, 'end': end, 'ts_urls':ts_urls, 'd_path': d_path})
t.setDaemon(True)
t.start()
# 等待所有线程下载完成
main_thread = threading.current_thread()
for t in threading.enumerate():
if t is main_thread:
continue
t.join()
if __name__ == '__main__':
print('''
本软件实现多线程下载m3u8并自动转化为mp4
事先准备好一个保存至dililidown目录下的完整m3u8和一个基础链接
创建这样的文件夹:E:/vsc and pytraining/pytrain/dililidown/all
E:/vsc and pytraining/pytrain/dililidown/tss/del
''')
m3u8_name=input('请输入m3u8文件不带后缀的文件名:')
m3u8_path='E:/vsc and pytraining/pytrain/dililidown/'+m3u8_name+'.m3u8'
base_url=input('''
请输入如下的基础链接:
https://sina.com-h-sina.com/20180813/8954_f12018ec/1000k/hls/
''')
d_path='E:/vsc and pytraining/pytrain/dililidown/tss/del'
combine_path='E:/vsc and pytraining/pytrain/dililidown/all'
exist=os.path.exists(d_path)
if exist:
shutil.rmtree(d_path)
os.makedirs(d_path)
ts_urls=get_ts_url(m3u8_path,base_url)
file_size=len(ts_urls)
tsurlleft=[]
for number in range(100,0,-1):
if file_size % number < 10:
num_thread=number
break
thre(num_thread)
for failtimes in range(5):
if tsurlleft:
print('第{}次补偿'.format(failtimes+1))
print(tsurlleft)
ts_urls=tsurlleft
tsurlleft=[]
down_ts(0,len(ts_urls),ts_urls,d_path)
#thre(5) filesize没改
time.sleep(3)
if failtimes==4:
break
if not tsurlleft:
time.sleep(4)
combine(d_path,combine_path,m3u8_name)
break
if tsurlleft:
print('多次尝试未成功,请手动')
print('运行完毕')
time.sleep(4)
input('任意键退出')