#多线程赋值用字典格式 试下第二种方法 顺便把for循环 用正则表达 看能不能快点#适应两种 m3u8 读取的格式
importrequestsimportosimportdatetimeimportthreadingimportrefrom queue importQueueimportrandomimportsysfrom fake_useragent importUserAgent#下载耗时:0:00:47
classxiazai():def __init__(self):
self.url=xiazaidizhi
work_dir=os.getcwd()#print(work_dir)
#用来保存ts文件
file_dir =os.path.join(work_dir, wenjian)if notos.path.exists(file_dir):
os.mkdir(file_dir)
user_agent_list=['Mozilla/5.0 (Windows NT 6.2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/28.0.1464.0 Safari/537.36','Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/31.0.1650.16 Safari/537.36','Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/35.0.3319.102 Safari/537.36','Mozilla/5.0 (X11; CrOS i686 3912.101.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/27.0.1453.116 Safari/537.36','Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/27.0.1453.93 Safari/537.36','Mozilla/5.0 (Windows NT 6.2; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/32.0.1667.0 Safari/537.36','Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:17.0) Gecko/20100101 Firefox/17.0.6','Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/28.0.1468.0 Safari/537.36','Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2224.3 Safari/537.36','Mozilla/5.0 (X11; CrOS i686 3912.101.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/27.0.1453.116 Safari/537.36']#t = random.choice(user_agent_list)
t = UserAgent(use_cache_server=False, verify_ssl=False).random
self.headers={'User-Agent':t
}
self.savefile()defsavefile(self):
r= requests.get(self.url, headers=self.headers)#合成带有hls的m3u8地址
if r.text.split('\n')[-1] == '':
hls_mark= r.text.split('\n')[-2] #以防\n结尾
hls_mark = hls_mark.split('/')if len(hls_mark) > 3:
hls_mark= '/'.join(hls_mark[-3:])
self.base_url= re.split(r"[a-zA-Z0-9-_\.]+\.m3u8", self.url)[0]
url_m3u8_hls= self.base_url +hls_markprint(url_m3u8_hls)#url_m3u8_hls = file_url.replace('index.m3u8', hls_mark)
#file_m3u8 = url_m3u8_hls.split('/')[-1]
returnurl_m3u8_hls#print(url_m3u8_hls)
else:
hls_mark= '/'.join(hls_mark[-3:])#self.base_url = re.split(r"[a-zA-Z0-9-_\.]+\.m3u8", self.url)[0]
self.base_url = self.url.split('/')[:3]
self.base_url= '/'.join(self.base_url)
url_m3u8_hls= self.base_url +hls_markprint(url_m3u8_hls)#url_m3u8_hls = file_url.replace('index.m3u8', hls_mark)
#file_m3u8 = url_m3u8_hls.split('/')[-1]
returnurl_m3u8_hls#print(url_m3u8_hls)
else:
hls_mark= r.text.split('\n')[-1]
hls_mark= hls_mark.split('/')if len(hls_mark) > 3:
hls_mark= '/'.join(hls_mark[-3:])
self.base_url= re.split(r"[a-zA-Z0-9-_\.]+\.m3u8", self.url)[0]
url_m3u8_hls= self.base_url +hls_markprint(url_m3u8_hls)#url_m3u8_hls = file_url.replace('index.m3u8', hls_mark)
#file_m3u8 = url_m3u8_hls.split('/')[-1]
returnurl_m3u8_hls#print(url_m3u8_hls)
else:
hls_mark= '/'.join(hls_mark[-3:])#self.base_url = re.split(r"[a-zA-Z0-9-_\.]+\.m3u8", self.url)[0]
self.base_url = self.url.split('/')[:3]
self.base_url= '/'.join(self.base_url)
url_m3u8_hls= self.base_url +hls_markprint(url_m3u8_hls)#url_m3u8_hls = file_url.replace('index.m3u8', hls_mark)
#file_m3u8 = url_m3u8_hls.split('/')[-1]
returnurl_m3u8_hls#print(url_m3u8_hls)
classxiazai1():def __init__(self):
user_agent_list=['Mozilla/5.0 (Windows NT 6.2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/28.0.1464.0 Safari/537.36','Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/31.0.1650.16 Safari/537.36','Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/35.0.3319.102 Safari/537.36','Mozilla/5.0 (X11; CrOS i686 3912.101.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/27.0.1453.116 Safari/537.36','Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/27.0.1453.93 Safari/537.36','Mozilla/5.0 (Windows NT 6.2; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/32.0.1667.0 Safari/537.36','Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:17.0) Gecko/20100101 Firefox/17.0.6','Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/28.0.1468.0 Safari/537.36','Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2224.3 Safari/537.36','Mozilla/5.0 (X11; CrOS i686 3912.101.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/27.0.1453.116 Safari/537.36']
t= UserAgent(use_cache_server=False, verify_ssl=False).random
self.headers={'User-Agent': t
}defduqu(self):
url_m3u8_hls=xiazai().savefile()try:
r= requests.get(url_m3u8_hls, headers=self.headers).text
base_url= re.split(r"[a-zA-Z0-9-_\.]+\.m3u8", url_m3u8_hls)[0]
text_bytes= r.split('\n')#筛选以.ts结尾的行
#有些情况下可能是以其他格式的文件,比如png,下载后修改后缀即可
#ts_name = [i for i in text_string if i.endswith('.ts')]
'''ts_time = [i for i in text_bytes if i.startswith('#EXTINF')]
dm_time = 0
for i in range(len(ts_time)):
ts_time1 = ts_time[i].replace('#EXTINF:', '')
ts_time2 = ts_time1.replace(',', '')
dm_time = float(ts_time2) + dm_time'''
#self.shijian(dm_time)
ts_queue = Queue(10000)
concatfile= wenjian+'/' + "s" + '.txt'
#print(dm_time)
for line intext_bytes:if '.ts' inline:
m= line.split('/')if 'http' inline:#print("ts>>", line)
ts_queue.put(line)else:if len(m) >=3:
base_url1= url_m3u8_hls.split('/')[:3]
base_url1= '/'.join(base_url1)
line= base_url1 +line#print(line)
ts_queue.put(line)else:
line= base_url +line#print(line)
ts_queue.put(line)#print(ts_queue.put(line))
#print('ts>>',line)
filename = re.search('([a-zA-Z0-9-]+.ts)', line).group(1).strip()
open(concatfile,'a+').write("file %s\n" %filename)#filename = re.search('([a-zA-Z0-9-]+.ts)', line).group(1).strip()
#ts_neirong = [i for i in text_bytes if not i.startswith('#')]
#ts_neirong.pop()
#self.xiazai(ts_neirong,url_m3u8_hls)
#print(ts_neirong)
returnts_queueexcept:print('连接失败')
sys.exit()defshijian(self,dm_time):
shichang_time= str(datetime.timedelta(seconds=dm_time))print('视频时长:%s' %shichang_time)defxiazai1(self,ts_queue):#ffmpeg -f concat -safe 0 -i s.txt -c copy output.mp4
#tt_name = threading.current_thread().getName()
while notts_queue.empty():
url=ts_queue.get()try:
r= requests.get(url, stream=True, headers=self.headers)
filename= re.search('([a-zA-Z0-9-]+.ts)', url).group(1).strip()
with open(wenjian+'/' + filename, 'wb') as fp:
fp.write(r.content)#print(tt_name + " " + filename + ' 下载成功')
except:
with open(wenjian+ '/'+ 'shibai.txt', 'a') as fp:
fp.write(url+ '\n')
fp.close()print(url + '下载失败')defhebing(self,name):
basedir= os.path.abspath(os.path.dirname(__file__))
t= 'start /d "%s\%s" ffmpeg -f concat -safe 0 -i s.txt -c copy %s.mp4' %(basedir,wenjian,name)
with open('2.bat', 'w') as f:
f.write(t)
f.close()#os.system(r'D:\Study\pythonProject\Python_down_m3u8\2.bat')
os.startfile("2.bat")if __name__=='__main__':#xiazai('http://iqiyi.cdn9-okzy.com/20201019/16908_b0f2428f/index.m3u8')
xiazaidizhi = input("请输入m3u8链接:")
wenjian= input("请输入保存文件名:")
start= datetime.datetime.now().replace(microsecond=0)
s=xiazai1().duqu()#print(s)
#print(s,concatfile)
threads =[]for i in range(15):
t= threading.Thread(target=xiazai1().xiazai1, name='th-' + str(i), kwargs={'ts_queue': s})
threads.append(t)for t inthreads:
t.start()for t inthreads:
t.join()
end= datetime.datetime.now().replace(microsecond=0)print('下载耗时:' + str(end -start))#下载耗时:0:01:23