教你如何下载斗罗大陆

教你如何下载斗罗大陆

仅供学习使用

F12获取m3u8文件链接
在这里插入图片描述
(有时这个文件会找不到,不知道为啥)
推荐用这个url,py代码会提取vurl
在这里插入图片描述
Python,给爷爬

import requests
import re
import os
import urllib.parse
import time
start_time = time.time()
headers = {
    'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.141 Safari/537.36',
}
m3u8_url = input("m3u8_url:")
# url解码
m3u8_url = urllib.parse.unquote(m3u8_url)
r_url = '.*&vurl=(.*ver=4)'
url_list = re.findall(r_url,m3u8_url)
if len(url_list) != 0:
    m3u8_url = url_list[0]
m3u8 = requests.get(m3u8_url,headers = headers).text
r_m3u8 = ',\n(.*?)#'
# re.S整体匹配
ts_all = re.findall(r_m3u8,m3u8,re.S)
print('共 %d 个ts文件'%len(ts_all))
r_ts = '(.*)/.*'
ts_1 = re.findall(r_ts,m3u8_url)[0]
num = 0
if not os.path.exists("./ts"):
    os.mkdir("./ts")
for ts_2 in ts_all:
    ts_url = ts_1 + '/' + ts_2
    ts = requests.get(ts_url,headers = headers).content
    with open('./ts/%d.ts'%num,'wb') as fp:
        fp.write(ts)
    print('%d.ts save'%num)
    num += 1
end_time = time.time()
print('下载完成,总耗时:',end_time-start_time) 
  

bat合并ts文件

cd ts
set /p n=起始数字:
set /p end=结束数字:
copy %n%.ts out.ts
set num=%n%-%end%
:home
set /a n+=1
echo Y | copy /b out.ts+%n%.ts temp.ts && move /y temp.ts out.ts
if not %n%==%end% goto home
move /y out.ts ../out_%num%.mp4
pause

默认720p,更高画质可能需要会员的cookie
更新
加入协程函数,但是不太稳定,有的ts会卡死
等我以后再优化吧
问题应该和tasks缓存有关,代码结束后关闭即可

for task in asyncio.Task.all_tasks():
   task.cancel()
loop.close()

关闭也不行,有时还是会卡
完整协程代码(已废弃,最新代码在下面)

import requests
import re
import os
import urllib.parse
import asyncio
import aiohttp
import time
start_time = time.time()
error = []
async def get(num,url):
    headers = {
        'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.141 Safari/537.36',
    }
    print('%d.ts save start'%num)
    try:
        async with aiohttp.ClientSession() as session:
            async with await session.get(url,headers = headers) as response:
                ts = await response.read()
                with open('./ts/%d.ts'%num,'wb') as fp:
                    fp.write(ts)
                print('%d.ts save finish'%num)
    except:
        print('%d.ts save error'%num)
        error.append([num,url])
headers = {
    'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.141 Safari/537.36',
}
m3u8_url = input("m3u8_url:")
# url解码
m3u8_url = urllib.parse.unquote(m3u8_url)
r_url = '.*&vurl=(.*ver=4)'
url_list = re.findall(r_url,m3u8_url)
if len(url_list) != 0:
    m3u8_url = url_list[0]
m3u8 = requests.get(m3u8_url,headers = headers).text
r_m3u8 = ',\n(.*?)#'
# re.S整体匹配
ts_all = re.findall(r_m3u8,m3u8,re.S)
print('共 %d 个ts文件'%len(ts_all))
r_ts = '(.*)/.*'
ts_1 = re.findall(r_ts,m3u8_url)[0]
num = 0
tasks = []
if not os.path.exists("./ts"):
    os.mkdir("./ts")
for ts_2 in ts_all:
    ts_url = ts_1 + '/' + ts_2
    c = get(num,ts_url)
    task = asyncio.ensure_future(c)
    tasks.append(task)
    num += 1
loop = asyncio.get_event_loop()
loop.run_until_complete(asyncio.wait(tasks))
# 关闭全部任务,防止缓存的tasks出错
for task in asyncio.Task.all_tasks():
   task.cancel()
loop.close()
# 基本不会有失败,留着备用了
if len(error) == 0: 
    print('全部下载成功')
else: 
    print('失败 %d 次\n处理下载失败的内容'%len(errror))
    for url in error:
        num = url[0]
        ts = requests.get(url[1],headers = headers).content
        with open('./ts/%d.ts'%num,'wb') as fp:
            fp.write(ts)
        print('%d.ts save'%num)
end_time = time.time()
print('下载完成,总耗时:',end_time-start_time)
    

协程的速度还是很快的(左普通下载,右协程下载)
在这里插入图片描述
也许还存在问题,以后再说吧
想了个折中的办法
设置了每次开始的任务数量,循环调用协程函数,防止等待超时
暂时情况稳定,不会卡
最新协程代码

import requests
import re
import os
import urllib.parse
import asyncio
import aiohttp
import time
start_time = time.time()
error = []
async def get(num,url):
    headers = {
        'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.141 Safari/537.36',
    }
    print('%d.ts save start'%num)
    try:
        async with aiohttp.ClientSession() as session:
            async with await session.get(url,headers = headers) as response:
                ts = await response.read()
                with open('./ts/%d.ts'%num,'wb') as fp:
                    fp.write(ts)
                print('%d.ts save finish'%num)
    except:
        print('%d.ts save error'%num)
        error.append([num,url])
headers = {
    'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.141 Safari/537.36',
}
m3u8_url = input("m3u8_url:")
# url解码
m3u8_url = urllib.parse.unquote(m3u8_url)
r_url = '.*&vurl=(.*ver=4)'
url_list = re.findall(r_url,m3u8_url)
if len(url_list) != 0:
    m3u8_url = url_list[0]
m3u8 = requests.get(m3u8_url,headers = headers).text
r_m3u8 = ',\n(.*?)#'
# re.S整体匹配
ts_all = re.findall(r_m3u8,m3u8,re.S)
print('共 %d 个ts文件'%len(ts_all))
r_ts = '(.*)/.*'
ts_1 = re.findall(r_ts,m3u8_url)[0]
num = 0
tasks = []
if not os.path.exists("./ts"):
    os.mkdir("./ts")
# 控制每次开始的任务数量,防止等待超时
max_link = 20
t = len(ts_all) // max_link
if len(ts_all) % max_link != 0:
    t += 1
for i in range(1,t+1):
    print('第 %d 个任务'%i)
    while num < max_link*i and num < len(ts_all):
        ts_url = ts_1 + '/' + ts_all[num]
        c = get(num,ts_url)
        task = asyncio.ensure_future(c)
        tasks.append(task)
        num += 1
    loop = asyncio.get_event_loop()
    loop.run_until_complete(asyncio.wait(tasks))
    # 取消全部任务
    for task in asyncio.Task.all_tasks():
       task.cancel()
loop.close()
# 基本不会有失败,留着备用了
if len(error) == 0: 
    print('全部下载成功')
else: 
    print('失败 %d 次\n处理下载失败的内容'%len(errror))
    for url in error:
        num = url[0]
        ts = requests.get(url[1],headers = headers).content
        with open('./ts/%d.ts'%num,'wb') as fp:
            fp.write(ts)
        print('%d.ts save'%num)
end_time = time.time()
print('下载完成,总耗时:',end_time-start_time)
    
评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值