### 欢迎访问我的博客 blog.ayla1688.cool, 原文地址:http://blog.ayla1688.cool/archives/314.html
### 推荐
**浏览器:** chrome
**python版本:** python3
### 找.m3u8文件
* 在视频播放页面打开开发者模式 ```f12```,
* 找到.m3u8格式的文件,文件内容如下:
```
#EXTM3U
#EXT-X-VERSION:3
#EXT-X-TARGETDURATION:7
#EXT-X-MEDIA-SEQUENCE:0
#EXTINF:6.266667,
6d9935d97da000000.ts
#EXTINF:2.866667,
6d9935d97da000001.ts
#EXTINF:4.566667,
6d9935d97da000002.ts
#EXTINF:2.366667,
6d9935d97da000003.ts
#EXTINF:4.966667,
6d9935d97da000004.ts
#EXTINF:3.333333,
6d9935d97da000005.ts
#EXTINF:5.233333,
6d9935d97da000006.ts
#EXTINF:2.533333,
....
```
将 ```.m3u8```格式文件下载,重命名为```jiangziya```, 不带文件后缀。
### 下载.ts文件并合并
```
# https://youku.cdn7-okzy.com/20201004/20968_67a726a5/1000k/hls/6d9935d97da000138.ts
import requests
import threading
import os
import datetime
import time
count = 0
# 获取所有ts视频的链接地址
def get_ts_urls():
urls = []
with open( os.getcwd() + "/ts/jiangziya", "r") as file:
lines = file.readlines()
for line in lines:
if line.endswith(".ts\n"):
urls.append("https://youku.cdn7-okzy.com/20201004/20968_67a726a5/1000k/hls/" + line.strip("\n"))
return urls
# 开启多个进程
def threadDownload(start, end, urls):
global count
headers = {'Origin': 'http://www.86cg.tv',
'Referer': 'http://www.86cg.tv/online/85620-1-1.html',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.169 Safari/537.36'}
download_path = os.getcwd() + "/ts/"
for url in urls[start:end]:
try:
response = requests.get(url, stream=True, verify=False, headers=headers)
except Exception as e:
print("请求异常:%s" % e.args)
exit(0)
ts_path = download_path + url.split("/")[-1]
with open(ts_path, "wb") as file:
file.write(response.content)
count += 1
print("下载进度:%.2f" % (count / len(urls)))
# 下载, 开启100个进程, 每个进程下载50个.ts文件
def download(urls):
num_thread = 100
part = 50
for i in range(num_thread):
start = part * i
if i == num_thread - 1:
end = len(urls)
else:
end = start + part
t = threading.Thread(target=threadDownload, kwargs={'start': start, 'end': end, 'urls': urls})
t.setDaemon(True)
t.start()
main_thread = threading.current_thread()
for t in threading.enumerate():
if t is main_thread:
continue
t.join()
# 合并.ts 文件
def combine(files):
root_path = os.getcwd() + "/ts/"
file_name = "jiangziya.ts"
with open(root_path + file_name, "wb+") as file:
for i in range(len(files)):
file.write(open(files[i], "rb").read())
print("合并完成")
# 查找文件夹下所有的.ts格式文件
def fileWalker():
root_path = os.getcwd() + "/ts"
file_list = []
for files in os.walk(root_path):
# print(files)
files[2].sort()
for fn in files[2]:
if fn.endswith(".ts"):
file_list.append(root_path + "/" + fn)
return file_list
if __name__ == "__main__":
print("开始下载......")
start = int(time.time())
urls = get_ts_urls()
download(urls)
print("下载完成,开始合并ts文件")
files = fileWalker()
combine(files)
end = int(time.time())
print("下载完成,一共用时:%d" % (end-start))
```