python多线程下载ts_python下载m3u8 ,简化版(多线程),适合不需要key的【一】...

#多线程赋值用字典格式 试下第二种方法 顺便把for循环 用正则表达 看能不能快点#适应两种 m3u8 读取的格式

importrequestsimportosimportdatetimeimportthreadingimportrefrom queue importQueueimportrandomimportsysfrom fake_useragent importUserAgent#下载耗时:0:00:47

classxiazai():def __init__(self):

self.url=xiazaidizhi

work_dir=os.getcwd()#print(work_dir)

#用来保存ts文件

file_dir =os.path.join(work_dir, wenjian)if notos.path.exists(file_dir):

os.mkdir(file_dir)

user_agent_list=['Mozilla/5.0 (Windows NT 6.2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/28.0.1464.0 Safari/537.36','Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/31.0.1650.16 Safari/537.36','Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/35.0.3319.102 Safari/537.36','Mozilla/5.0 (X11; CrOS i686 3912.101.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/27.0.1453.116 Safari/537.36','Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/27.0.1453.93 Safari/537.36','Mozilla/5.0 (Windows NT 6.2; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/32.0.1667.0 Safari/537.36','Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:17.0) Gecko/20100101 Firefox/17.0.6','Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/28.0.1468.0 Safari/537.36','Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2224.3 Safari/537.36','Mozilla/5.0 (X11; CrOS i686 3912.101.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/27.0.1453.116 Safari/537.36']#t = random.choice(user_agent_list)

t = UserAgent(use_cache_server=False, verify_ssl=False).random

self.headers={'User-Agent':t

}

self.savefile()defsavefile(self):

r= requests.get(self.url, headers=self.headers)#合成带有hls的m3u8地址

if r.text.split('\n')[-1] == '':

hls_mark= r.text.split('\n')[-2] #以防\n结尾

hls_mark = hls_mark.split('/')if len(hls_mark) > 3:

hls_mark= '/'.join(hls_mark[-3:])

self.base_url= re.split(r"[a-zA-Z0-9-_\.]+\.m3u8", self.url)[0]

url_m3u8_hls= self.base_url +hls_markprint(url_m3u8_hls)#url_m3u8_hls = file_url.replace('index.m3u8', hls_mark)

#file_m3u8 = url_m3u8_hls.split('/')[-1]

returnurl_m3u8_hls#print(url_m3u8_hls)

else:

hls_mark= '/'.join(hls_mark[-3:])#self.base_url = re.split(r"[a-zA-Z0-9-_\.]+\.m3u8", self.url)[0]

self.base_url = self.url.split('/')[:3]

self.base_url= '/'.join(self.base_url)

url_m3u8_hls= self.base_url +hls_markprint(url_m3u8_hls)#url_m3u8_hls = file_url.replace('index.m3u8', hls_mark)

#file_m3u8 = url_m3u8_hls.split('/')[-1]

returnurl_m3u8_hls#print(url_m3u8_hls)

else:

hls_mark= r.text.split('\n')[-1]

hls_mark= hls_mark.split('/')if len(hls_mark) > 3:

hls_mark= '/'.join(hls_mark[-3:])

self.base_url= re.split(r"[a-zA-Z0-9-_\.]+\.m3u8", self.url)[0]

url_m3u8_hls= self.base_url +hls_markprint(url_m3u8_hls)#url_m3u8_hls = file_url.replace('index.m3u8', hls_mark)

#file_m3u8 = url_m3u8_hls.split('/')[-1]

returnurl_m3u8_hls#print(url_m3u8_hls)

else:

hls_mark= '/'.join(hls_mark[-3:])#self.base_url = re.split(r"[a-zA-Z0-9-_\.]+\.m3u8", self.url)[0]

self.base_url = self.url.split('/')[:3]

self.base_url= '/'.join(self.base_url)

url_m3u8_hls= self.base_url +hls_markprint(url_m3u8_hls)#url_m3u8_hls = file_url.replace('index.m3u8', hls_mark)

#file_m3u8 = url_m3u8_hls.split('/')[-1]

returnurl_m3u8_hls#print(url_m3u8_hls)

classxiazai1():def __init__(self):

user_agent_list=['Mozilla/5.0 (Windows NT 6.2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/28.0.1464.0 Safari/537.36','Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/31.0.1650.16 Safari/537.36','Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/35.0.3319.102 Safari/537.36','Mozilla/5.0 (X11; CrOS i686 3912.101.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/27.0.1453.116 Safari/537.36','Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/27.0.1453.93 Safari/537.36','Mozilla/5.0 (Windows NT 6.2; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/32.0.1667.0 Safari/537.36','Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:17.0) Gecko/20100101 Firefox/17.0.6','Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/28.0.1468.0 Safari/537.36','Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2224.3 Safari/537.36','Mozilla/5.0 (X11; CrOS i686 3912.101.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/27.0.1453.116 Safari/537.36']

t= UserAgent(use_cache_server=False, verify_ssl=False).random

self.headers={'User-Agent': t

}defduqu(self):

url_m3u8_hls=xiazai().savefile()try:

r= requests.get(url_m3u8_hls, headers=self.headers).text

base_url= re.split(r"[a-zA-Z0-9-_\.]+\.m3u8", url_m3u8_hls)[0]

text_bytes= r.split('\n')#筛选以.ts结尾的行

#有些情况下可能是以其他格式的文件,比如png,下载后修改后缀即可

#ts_name = [i for i in text_string if i.endswith('.ts')]

'''ts_time = [i for i in text_bytes if i.startswith('#EXTINF')]

dm_time = 0

for i in range(len(ts_time)):

ts_time1 = ts_time[i].replace('#EXTINF:', '')

ts_time2 = ts_time1.replace(',', '')

dm_time = float(ts_time2) + dm_time'''

#self.shijian(dm_time)

ts_queue = Queue(10000)

concatfile= wenjian+'/' + "s" + '.txt'

#print(dm_time)

for line intext_bytes:if '.ts' inline:

m= line.split('/')if 'http' inline:#print("ts>>", line)

ts_queue.put(line)else:if len(m) >=3:

base_url1= url_m3u8_hls.split('/')[:3]

base_url1= '/'.join(base_url1)

line= base_url1 +line#print(line)

ts_queue.put(line)else:

line= base_url +line#print(line)

ts_queue.put(line)#print(ts_queue.put(line))

#print('ts>>',line)

filename = re.search('([a-zA-Z0-9-]+.ts)', line).group(1).strip()

open(concatfile,'a+').write("file %s\n" %filename)#filename = re.search('([a-zA-Z0-9-]+.ts)', line).group(1).strip()

#ts_neirong = [i for i in text_bytes if not i.startswith('#')]

#ts_neirong.pop()

#self.xiazai(ts_neirong,url_m3u8_hls)

#print(ts_neirong)

returnts_queueexcept:print('连接失败')

sys.exit()defshijian(self,dm_time):

shichang_time= str(datetime.timedelta(seconds=dm_time))print('视频时长:%s' %shichang_time)defxiazai1(self,ts_queue):#ffmpeg -f concat -safe 0 -i s.txt -c copy output.mp4

#tt_name = threading.current_thread().getName()

while notts_queue.empty():

url=ts_queue.get()try:

r= requests.get(url, stream=True, headers=self.headers)

filename= re.search('([a-zA-Z0-9-]+.ts)', url).group(1).strip()

with open(wenjian+'/' + filename, 'wb') as fp:

fp.write(r.content)#print(tt_name + " " + filename + ' 下载成功')

except:

with open(wenjian+ '/'+ 'shibai.txt', 'a') as fp:

fp.write(url+ '\n')

fp.close()print(url + '下载失败')defhebing(self,name):

basedir= os.path.abspath(os.path.dirname(__file__))

t= 'start /d "%s\%s" ffmpeg -f concat -safe 0 -i s.txt -c copy %s.mp4' %(basedir,wenjian,name)

with open('2.bat', 'w') as f:

f.write(t)

f.close()#os.system(r'D:\Study\pythonProject\Python_down_m3u8\2.bat')

os.startfile("2.bat")if __name__=='__main__':#xiazai('http://iqiyi.cdn9-okzy.com/20201019/16908_b0f2428f/index.m3u8')

xiazaidizhi = input("请输入m3u8链接:")

wenjian= input("请输入保存文件名:")

start= datetime.datetime.now().replace(microsecond=0)

s=xiazai1().duqu()#print(s)

#print(s,concatfile)

threads =[]for i in range(15):

t= threading.Thread(target=xiazai1().xiazai1, name='th-' + str(i), kwargs={'ts_queue': s})

threads.append(t)for t inthreads:

t.start()for t inthreads:

t.join()

end= datetime.datetime.now().replace(microsecond=0)print('下载耗时:' + str(end -start))#下载耗时:0:01:23

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值