记录一次异步下载视频数据的过程。
主要步骤:
1、拿到页面源代码,找到iframe
2、从iframe页面源代码中找到m3u8文件地址
3、下载第一层m3u8文件 ===>下载第二层m3u8文件
4、下载ts文件
5、解密
6、合并文件
以下为具体代码:
mport asyncio
import requests
from bs4 import BeautifulSoup
import re
import asyncio
import aiohttp
import aiofiles
from Crypto.Cipher import AES # pycryptodome
import os
def get_iframe_url(url):
resp = requests.get(url)
main_page = BeautifulSoup(resp.text, "html.parser")
src = main_page.find("iframe").get("src")
# 拼接src 获得真真正正的ts播放路径
'''
真真正正的路径是 http://www.wbdy.tv/js/player/?url=https://video.buycar5.cn/20200901/e4NhpyM5/index.m3u8&id=29911&num=1&count=22&vt=1
得到的路径是 /js/player/?url=https://video.buycar5.cn/20200901/e4NhpyM5/index.m3u8&id=29911&num=1&count=22&vt=1src = "http://"+url.split("//")[1].split("/play")[0] + src
'''
src = url.split("/play/")[0] +src
return src
def get_first_m3u8_url(play_url):
resp = requests.get(play_url)
# print(resp.text)
obj = re.compile(r'url: "(?P<url>.*?)",', re.S)
m3u8_url = obj.search(resp.text).group("url")
# print(m3u8_url)
# output https://video.buycar5.cn/20200810/3UbmZCEa/index.m3u8
# need https://video.buycar5.cn/20200810/3UbmZCEa/index.m3u8
return m3u8_url
def down_load_m3u8(url, name):
# print(url)
resp = requests.get(url)
with open(name, mode="wb")as f:
f.write(resp.content)
async def download_ts(url, name, session):
async with session.get(url) as resp:
async with aiofiles.open(f"video/{name}", mode="wb") as f:
await f.write(await resp.content.read()) # 把下载到的内容写入到文件中
print(f"{name}下载完毕")
async def aio_download(): # https://boba.52kuyun.com/20170906/Moh2l9zV/hls/
tasks = []
async with aiohttp.ClientSession() as session: # 提前准备好session
async with aiofiles.open("second.txt", mode="r", encoding='utf-8') as f:
async for line in f:
if line.startswith("#"):
continue
line = line.strip()
name = line.split("/hls/")[1]
# print(name)
# print(line)
task = asyncio.create_task(download_ts(line, name, session)) # 创建任务
tasks.append(task)
await asyncio.wait(tasks) # 等待任务结束
def get_key(url):
# print(url)
resp = requests.get(url)
# print("resp,",resp.text)
return(resp.content)
async def dec_ts(name, key):
name = name.split("/hls/")[1]
aes = AES.new(key=key, IV=b"0000000000000000", mode=AES.MODE_CBC)
async with aiofiles.open(f"video/{name}", mode="rb", ) as f1, \
aiofiles.open(f"video/temp_{name}", mode="wb", ) as f2:
bs = await f1.read() # 从源文件读取内容
# print(bs)
await f2.write(aes.decrypt(bs)) # 把解密好的内容写入文件
print(f"{name}处理完毕")
async def aio_dec(key):
# 解密
tasks = []
async with aiofiles.open("second.txt", mode="r", encoding="utf-8") as f:
async for line in f:
if line.startswith("#"):
continue
line = line.strip()
# 开始创建异步任务
task = asyncio.create_task(dec_ts(line, key))
tasks.append(task)
await asyncio.wait(tasks)
def merge_ts():
list = []
with open("second.txt", mode="r", encoding="utf-8") as f:
for line in f:
if line.startswith("#"):
continue
line = line.strip().split("/hls/")[1]
# video/temp_{name}
# print(line)
list.append(f"video/temp_{line}")
s = "+".join(list)
# adb = "D:"
# os.system(adb)
# print(os.system("di"))
os.chdir(r'进到文件所在地址')
os.system(f"copy /b *.ts xxx.mp4")
print("完事")
def main(url):
# 1. 拿到主页源代码 去iframe 找到对应的播放url
# Warning------> 记得在函数里面return出去,光输出是没有用的
play_url = get_iframe_url(url)
# print(play_url)
# 2. 拿到第一层的ts下载路径
first_m3u8_utl = get_first_m3u8_url(play_url)
# need /20200810/3UbmZCEa/2000kb/hls/index.m3u8
# output https://video.buycar5.cn/20200810/3UbmZCEa/index.m3u8
# print(first_m3u8_utl)
# 3.1 下载第一层的m3u8文件
down_load_m3u8(first_m3u8_utl, "first.txt")
# 3.2 下载第二层的 m3u8文件
with open("first.txt", mode="r", encoding="utf-8") as f:
for line in f:
if line.startswith("#"):
continue
else:
line = line.strip().split("?skipl=1")[0]
# /20200901/e4NhpyM5/1000kb/hls/index.m3u8
# Request URL: https://video.buycar5.cn/20200901/e4NhpyM5/1000kb/hls/index.m3u8
# https://video.buycar5.cn/20200901/e4NhpyM5/index.m3u8
# https://ts1.yuyuangewh.com:9999/20200901/e4NhpyM5/1000kb/hls/ZlPMyMi0.ts
# print(first_m3u8_utl)
# print(line)
second_m3u8_url = first_m3u8_utl.split("/20200810/")[0] + line
# print(second_m3u8_url)
# output https://video.buycar5.cn/20200810/3UbmZCEa/index.m3u8/20200810/3UbmZCEa/2000kb/hls/index.m3u8
# https://video.buycar5.cn/20200810/3UbmZCEa/2000kb/hls/index.m3u8
# need https://video.buycar5.cn/20200810/3UbmZCEa/2000kb/hls/index.m3u8
# line /20200810/3UbmZCEa/2000kb/hls/index.m3u8
down_load_m3u8(second_m3u8_url, "second.txt")
# print("m3u8文件下载完毕")
# 4. 下载视频
# 下载txt里面的视频即可
# 异步协程
asyncio.run((aio_download()))
# 解密
# 5.1 拿到秘钥
# need https://ts1.yuyuangewh.com:9999/20200810/3UbmZCEa/2000kb/hls/key.key
# file https://ts1.yuyuangewh.com:9999/20200810/3UbmZCEa/2000kb/hls/key.key
key_url = ""
with open("second.txt", mode="r", encoding="utf-8") as f:
for line in f:
if line.startswith("#EXT-X-KEY"):
line = line.split('AES-128,URI="')[1].replace('"', "")
# print(line)
key_url = line
# print(key_url)
key = get_key(key_url)
# 5.2 解密
asyncio.run(aio_dec(key))
# 6.合并
merge_ts()
if __name__ == '__main__':
url = "http://www.wbdy.tv/play/29398_1_1.html"
main(url)