网址-m3u8-ts-多线程-mp4
我上一个帖子的优化
#python dililidownload.py
import threading
import requests
import re
import os
from os import path
import shutil
import urllib3
import socket
import time
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
socket.setdefaulttimeout(10)
def down_m3u8(netlink):
global m3u8_name
global m3u8_path
global base_url
m3u8_path='E:/vsc and pytraining/pytrain/dililidown/'+m3u8_name+'.m3u8'
header={
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.25 Safari/537.36 Core/1.70.3741.400 QQBrowser/10.5.3863.400'
}
html=requests.get(netlink,headers=header).text
reg=r'var vid = \'(.*?)\';'
vlinks=re.findall(reg,html)
m3u8_url=[]
if vlinks:
for things in range(len(vlinks)):
m3u8_link=vlinks[things]
m3u8_link=m3u8_link.replace(r'%3A',r':')
m3u8_link=m3u8_link.replace(r'%2F',r'/')
try:
m3u8_url=m3u8_link.replace(r'index.m3u8',r'1000k/hls/index.m3u8')#可以下载m3u8的
print(m3u8_url)
m3u8_file=requests.get(m3u8_url,headers=header)
with open(m3u8_path,"wb+") as m3u8file:
m3u8file.write(m3u8_file.content)
base_url=m3u8_link.replace(r'index.m3u8',r'1000k/hls/')
time.sleep(3)
break
except Exception as e:
print("异常请求:%s"%e.args)
try:
m3u8_url=m3u8_link.replace(r'index.m3u8',r'800k/hls/index.m3u8')#可以下载m3u8的
print(m3u8_url)
m3u8_file=requests.get(m3u8_url,headers=header)
with open(m3u8_path,"wb+") as m3u8file:
m3u8file.write(m3u8_file.content)
base_url=m3u8_link.replace(r'index.m3u8',r'800k/hls/')
time.sleep(3)
break
except:
print('m3u8地址暂无法获取,请检查输入链接!')
if things == len(vlinks)-1:
input('搜寻完毕,获取失败,按任意键结束。。。')
exit()
continue
m3u8_file.close()
def get_ts_url(m3u8_path,base_url):#获得每一个ts地址
urls=[]
with open(m3u8_path,'r')as mfile:
lines=mfile.readlines()
for line in lines:
if line.endswith('.ts\n'):
urls.append(base_url+line.strip('\n'))
mfile.close()
return urls
def down_ts(start,end,ts_urls,d_path):#轮流下载ts
for ts_url in ts_urls[start:end]:
ts_name=ts_url.split('/')[-1]
header={
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.25 Safari/537.36 Core/1.70.3741.400 QQBrowser/10.5.3863.400'
}
try:
response=requests.get(ts_url,headers=header,stream=True,verify=False)#stream是大文件等.iter_content再下载,verify跳过ssl
except:
continue
ts_path = d_path+"/{0}".format(ts_name)
try:
with open(ts_path,"wb+") as file:
for chunk in response.iter_content(chunk_size=1024):
if chunk:
file.write(chunk)
except:
continue
response.close()
time.sleep(2)
def file_walker(path):#轮询路径下文件获得名字列表
file_list = []
for root, dirs, files in os.walk(path): # 生成器
for fn in files:
p = str(root+'/'+fn)
file_list.append(p)
return file_list
def combine(d_path, combine_path, file_name):#合并获得的列表
file_list = file_walker(d_path)
file_path = combine_path + '\\'+file_name + '.mp4'
with open(file_path, 'wb+') as fw:
for i in range(len(file_list)):
fw.write(open(file_list[i], 'rb').read())
print('合成完毕')
print('清理缓存中请等待')
shutil.rmtree(d_path)
os.remove(m3u8_path)
time.sleep(5)
print('缓存清理完毕')
def thre(file_size,num_thread,ts_urls):#多线程
part = file_size // num_thread # 如果不能整除,最后一块应该多几个字节
for i in range(num_thread):
start = part * i
if i == num_thread - 1: # 最后一块
end = file_size
else:
end = start + part
t = threading.Thread(target=down_ts, kwargs={'start': start, 'end': end, 'ts_urls':ts_urls, 'd_path': d_path})
t.setDaemon(True)
t.start()
# 等待所有线程下载完成
main_thread = threading.current_thread()
for t in threading.enumerate():
if t is main_thread:
continue
t.join()
def checkfull(d_path):
file_lista = []#放已经下载好的名字
for root, dirs, files in os.walk(d_path): # 生成器d_
for fname in files:
px = str(fname)
file_lista.append(px)
tsnames=get_ts_url(m3u8_path,'')#完整的名字
global final
final= [ i for i in tsnames if i not in file_lista ]
#剩下没下载的ts名字
if final:
for numb in range(100,0,-1):#选择线程的较优值
if len(final) % numb < 8:
num_thr=numb
break
ts_urlsfinal=[]
for lll in final:
ts_urlsfinal.append(base_url+lll)
print('开始本次补偿')
left_size=len(ts_urlsfinal)
print('剩下{}个链接未下载'.format(left_size))
thre(left_size,num_thr,ts_urlsfinal)
if not final:
print('准备合成')
time.sleep(3)
combine(d_path,combine_path,m3u8_name)
print('运行完毕')
time.sleep(3)
exit()
if __name__ == '__main__':
print('''
本软件实现多线程下载m3u8并自动转化为mp4
事先准备好一个网站的某集链接
如果中途卡住可以直接叉叉,不用担心缓存问题
会生成这样的文件夹:E:/vsc and pytraining/pytrain/dililidown/all
E:/vsc and pytraining/pytrain/dililidown/tss/del
''')
netlink=input('请输入一集链接:')
m3u8_name=input('给你要下载的东西取个名字:')
d_path='E:/vsc and pytraining/pytrain/dililidown/tss/del/'+m3u8_name
combine_path='E:/vsc and pytraining/pytrain/dililidown/all'
exist=os.path.exists(d_path)#缓存文件夹..
if not exist:
#shutil.rmtree(d_path)#删除
os.makedirs(d_path)#创建
exista=os.path.exists(combine_path)
if not exista:
os.makedirs(combine_path)
down_m3u8(netlink)#通过链接下载m3u8并找到基础链接..
for failtimes in range(9):
try:
print('第{}次检查文件'.format(failtimes+1))
checkfull(d_path)
except:
if not final:
break
continue
if final :
print('多次尝试仍然有遗漏,请重开或手动')
input('任意键退出')
#输入例子 dilili的第9集 http://m.dlili.tv/vplay/NDc3Mi0xLTg=.html 第10集http://m.dlili.tv/vplay/NDc3Mi0xLTk=.html