龙岭迷窟视频的爬取 对于视频的爬取大多数没有直接给出MP4链接 大多数是ts文件合成
1 按f12 找到m3u8下载 下面代码get_m3u8_save函数中保存
2 找到ts中url把提取m3u8 文件中的进行拼接 如下面get_ts_url函数
3下载ts文件下面代码 download函数 本人下载ts截图如下:
合并 成MP4 格式 copy /b D:\PycharmProjects\baidu\video* D:\PycharmProjects\baidu\video\new.mp4 /b 后面是ts源地址 *表示所有 后面跟新的名称mp4格式
详细代码如下:
import requests
from threading import *
import time
import random
# https://www.88ysw.com/ 88影视网
# Python的urllib3软件包的证书认证及警告的禁用
import urllib3
nMaxThread = 5
connectlock = BoundedSemaphore(nMaxThread)
urllib3.disable_warnings()
header = {
'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:69.0) Gecko/20100101 Firefox/69.0'
}
def get_m3u8_save(self,url):
m3u8 = requests.get(url=url,headers=header)
with open("m3u8_path/m3u8_path.txt", "w") as f:
f.write(m3u8.text)
f.close()
def get_ts_url(ts_base_url):
urls = []
with open("m3u8_path/m3u8_path.txt", "r") as file:
lines = file.readlines()
for line in lines:
if line.endswith(".ts\n"):
urls.append(ts_base_url + line.strip("\n"))
print(urls)
return urls
def download(ts_urls):
try:
for i in range(len(ts_urls)):
ts_url = ts_urls[i]
file_name = ts_url.split("/")[-1]
res = requests.get(ts_url, headers=header,stream=True,verify=False)
with open("./video/{0}".format(file_name), 'wb')as f: # 保存数据流为ts格式
for chunk in res.iter_content(chunk_size=2048):
f.write(chunk)
print(file_name+"下载结束".format(i))
# time.sleep(random.random()*0.1)
finally:
print("下载失败--------")
connectlock.release()
if __name__ == '__main__':
url = "https://yuledy.helanzuida.com/20200422/2931_1a8a5c71/1000k/hls/index.m3u8"
ts_base_url = "https://yuledy.helanzuida.com/20200422/2931_1a8a5c71/1000k/hls/"
# get_m3u8_save(url)
urls = get_ts_url(ts_base_url)
connectlock.acquire()
t = Thread(target=download, args=(urls,))
t.start()