用 chrome + excel + VBA + XMLHTTP 爬视频网站 video 标签中的 blob:http m3u8 视频资源,ffmpeg 拼接资源

目录

一、准备

二、实干

三、等待

四、拼接

五、观影及xls工程下载


刚刚看了一个视频网站的资源,用 chrome 分析 Network 部分,发现其使用的也是 m3u8 格式

(Html源码图)

看了几位大神的文章,有说到直接用 : ffmpeg -i http://www.xxx.com/xxx.m3u8 name.mp4  下载,试了下,的确很爽,可是今天另一集用 ffmpeg 爬不了了,因为切片资源格式异常问题一直报错,最终卡死了,下了 20% 不到就不动了。

无奈之下,看来得自己写一个下载 m3u8 指向的 ts 文件资源的小程序了。

一、准备

    1.1 chrome 

            推荐 61 及以后版本。

    1.2 excel

            推荐 2003 及以后版本,若是 2003 以后版本,记得打开宏权限。

    1.3 ffmpeg 

            这个没什么好说的,自己找资源下载吧。我用的是格式工厂里面分离出来的:

           D:\Program Files (x86)\FormatFactory\ffmpeg.exe

           当然,你懒得分离的话,直接把它加到系统变量 path 里面也行。

(图)

二、实干

    2.1 解析取得资源文件及链接

用 chrome 分析 Network 部分,会发现有两个 m3u8 文件,为了一步到位,我们一般选择大的那个,小的其实只是指向大的这个的链接。在资源列表文件上点右键,选择打开在新标签页,这样就可以将其下载到我们的电脑上:

(图)

找到下载的 m3u8 文件,复制到你准备的工作目录,例如我的:H:\Media\m3u8download\1-08\

同时,复制这个 m3u8 文件的链接,留作下一步用:

用记事本打开下载的那个 m3u8 文件,你会发现里面实际上是个列表,但是缺少主域名路径,这就需要用到上面复制的链接地址:

去除相同部分,得到我们下面代码中需要用到的根链接: https://www.yxlmbbs.com:65

    2.2 下载所用源码(启动过程 runDownload):

在 excel 中打开 VBA 编辑程序,添加模块,填入以下代码,然后在 runDownload 内按 F5 运行即可。

Option Explicit
Public Declare Sub GetLocalTime Lib "kernel32" (lpSystemTime As SYSTEMTIME)

Public Type SYSTEMTIME
        wYear As Integer
        wMonth As Integer
        wDayOfWeek As Integer
        wDay As Integer
        wHour As Integer
        wMinute As Integer
        wSecond As Integer
        wMilliseconds As Integer
End Type
'----------------获取系统完整时间例子
Public JsysData As SYSTEMTIME
Private Declare Function QueryPerformanceCounter Lib "kernel32" (lpPerformanceCount As Currency) As Long
Private Declare Function QueryPerformanceFrequency Lib "kernel32" (lpFrequency As Currency) As Long
     
Public Declare Function timeGetTime Lib "winmm.dll" () As Long
Public Declare Sub Sleep Lib "kernel32" (ByVal dwMilliseconds As Long)

'--------所用API定义
Public Declare Function AllocConsole Lib "kernel32" () As Long
Public Declare Function FreeConsole Lib "kernel32" () As Long
Public Declare Function CloseHandle Lib "kernel32" (ByVal hObject As Long) As Long
Public Declare Function GetStdHandle Lib "kernel32" (ByVal nStdHandle As Long) As Long
Public Declare Function LstrLen Lib "kernel32" Alias "lstrlenA" (ByVal lpString As String) As Long
'Public Declare Function ReadConsole Lib "kernel32" Alias "ReadConsoleA" (ByVal hConsoleInput As Long, ByVal lpBuffer As String, ByVal nNumberOfCharsToRead As Long, lpNumberOfCharsRead As Long, lpReserved As Any) As Long
'Public Declare Function SetConsoleMode Lib "kernel32" (ByVal hConsoleOutput As Long, ByVal dwMode As Long) As Long
Public Declare Function SetConsoleTextAttribute Lib "kernel32" (ByVal hConsoleOutput As Long, ByVal wAttributes As Long) As Long
Public Declare Function SetConsoleTitle Lib "kernel32" Alias "SetConsoleTitleA" (ByVal lpConsoleTitle As String) As Long
Public Declare Function WriteConsole Lib "kernel32" Alias "WriteConsoleA" (ByVal hConsoleOutput As Long, ByVal lpBuffer As Any, ByVal nNumberOfCharsToWrite As Long, lpNumberOfCharsWritten As Long, lpReserved As Any) As Long

'-------------常量定义
'控制台输入输出句柄获取用常量
Public Const STD_INPUT_HANDLE = -10&
Public Const STD_OUTPUT_HANDLE = -11&
Public Const STD_ERROR_HANDLE = -12&
'部分前背景颜色代码,详见: 控制台色卡.png
Public Const FOREGROUND_BLUE = 9
Public Const FOREGROUND_GREEN = 10
Public Const FOREGROUND_RED = 12
Public Const FOREGROUND_INTENSITY = &H8
Public Const BACKGROUND_BLUE = &H10
Public Const BACKGROUND_GREEN = &H20
Public Const BACKGROUND_RED = &H40
Public Const BACKGROUND_INTENSITY = &H80
'设置输入模式常量 SetConsoleMode (input)
Public Const ENABLE_LINE_INPUT = &H2
Public Const ENABLE_ECHO_INPUT = &H4
Public Const ENABLE_MOUSE_INPUT = &H10
Public Const ENABLE_PROCESSED_INPUT = &H1
Public Const ENABLE_WINDOW_INPUT = &H8
'设置输出模式常量 SetConsoleMode (output)
Public Const ENABLE_PROCESSED_OUTPUT = &H1
Public Const ENABLE_WRAP_AT_EOL_OUTPUT = &H2
'-----------所需全局变量
Public hConsoleIn As Long ' 控制台输入句柄
Public hConsoleOut As Long ' 控制台输出句柄
'Public hConsoleErr As Long ' 控制台错误句柄

Dim ResponseText, ResponseBody

Sub runDownload()
    Dim i, urlx, fp, fn, s, t, u() As String, d
    ResponseText = 1
    ResponseBody = 2

    Call Initialize

    Call setTitle("下载 m3u8 切片资源")
    Call setCONColor(FOREGROUND_GREEN, 0)
    Call COut("----------- 开始 --------------" & vbCrLf)

    fp = FreeFile
    Call COut("----------- 加载 m3u8 列表文件 --------------" & vbCrLf)
    Open "H:\Media\m3u8download\1-08\index.m3u8" For Input As #fp
        Do While Not EOF(fp)
            Line Input #fp, t
            s = s & t
            DoEvents
        Loop
    Close #fp
    Call COut("----------- 完成! --------------" & vbCrLf)
    u = Split(s, vbLf)
    d = UBound(u)
    fp = FreeFile
    Call COut("----------- 解析/下载切片文件开始 --------------" & vbCrLf)
    Open "H:\Media\m3u8download\1-08\lst1.txt" For Output As #fp
    For i = 0 To d
        Call COut(i & "/" & d & "行:")
        s = Trim(Replace(u(i), vbCr, ""))
        If Mid(s, 1, 1) = "/" Then '此行判断根据具体文件格式而定
            '-----根域名链接地址 https://www.yxlmbbs.com:65
            urlx = "https://www.yxlmbbs.com:65" & s 'Worksheets("s1").Cells(i, 1).Value
            '-------指定下载文件保存位置
            fn = "H:\Media\m3u8download\1-08\ts\" & Mid(urlx, InStrRev(urlx, "/") + 1)
            Call GetDataToFile(urlx, ResponseBody, fn, True, "utf-8", "utf-8")
            '--------将下载的文件名生成用于 ffmpeg 拼接的列表文件 lst1.txt
            Print #fp, "file '" & fn & "'"
        Else
            Call COut("非资源行跳过!" & vbCrLf)
        End If
        DoEvents
    Next
    Close #fp
    Call COut("----------- 完成! --------------" & vbCrLf)
    Delay 3000
    Call Terminate
End Sub

Public Sub GetDataToFile(Url, DataStic, fname, varAsyncX, CodePageX, saveCodePage) 'As Variant
    
  'On Error GoTo ERR:
  Dim XMLHTTP 'As Object
  Dim DataS, GetData  ' As Variant
  Dim DataB, fn 'As Integer
    
  Set XMLHTTP = CreateObject("Msxml2.XMLHTTP") '"Microsoft.XMLHTTP")
    
  XMLHTTP.Open "get", Url, varAsyncX, "", "" ' True
  XMLHTTP.send
  Call COut("开始下载 " & Url & ":")
    If varAsyncX Then
        Do Until XMLHTTP.ReadyState = 4
            Delay 200
            Call COut(".")
            'DoEvents
        Loop
    End If
  '--------------------------------------函数返回
    Select Case DataStic
      Case ResponseText
        '--------------------------------直接返回字符串
        DataS = XMLHTTP.ResponseText
        GetData = DataS
      Case ResponseBody
        '--------------------------------直接返回二进制
        DataB = XMLHTTP.ResponseBody
        GetData = DataB
      Case ResponseBody + ResponseText
        '------------------------------二进制转字符串[直接返回字串出现乱码时尝试]
        DataS = BytesToStr(XMLHTTP.ResponseBody, CodePageX)
        GetData = DataS
      Case Else
        '--------------------------------无效的返回
        GetData = ""
    End Select
    If Len(GetData) > 1 Then
        'fn = FreeFile
        'Open fname for Output As #fn
        'Print #fn, GetData
        'Close #fn
        saveFile GetData, fname, saveCodePage
        '-----------
        'MsgBox "链接:" & Url & " ," & vbCrLf & "获取内容已保存至:" & fname, vbInformation, "--OK"
    Else
        'MsgBox "链接:" & Url & " 内容失败!", vbCritical, "Err"
    End If
    Call COut("完成,并保存至:" & fname & vbCrLf)
  '--------------------------------------释放空间
  Set XMLHTTP = Nothing
End Sub

Public Function BytesToStr(strBody, CodeBase)
Dim objStream
Set objStream = CreateObject("Adodb.Stream")
With objStream
    .Type = 1
    .Mode = 3
    .Open
    .Write strBody
    .Position = 0
    .Type = 2
    .Charset = CodeBase
    BytesToStr = .ReadText
    .Close
End With
Set objStream = Nothing
End Function

Public Function saveFile(data, recfilen, CodePage)
    Dim fxt, txt, Astream
    Set Astream = CreateObject("Adodb.Stream") 'asp Server.CreateObject("Adodb.Stream")
    fxt = Mid(recfilen, InStrRev(recfilen, ".") + 1)
    txt = False
    If fxt = "asp" Or fxt = "xml" Or fxt = "aspx" Or fxt = "php" Or fxt = "txt" Or fxt = "jsp" Or fxt = "htm" Or fxt = "html" Or fxt = "js" Then
        txt = True
    End If
    If txt Then
        Astream.Type = 2 '1 bin,2 txt
    Else
        Astream.Type = 1 '1 bin,2 txt
    End If
    Astream.Mode = 3 '     adModeRead =1
                    '  adModeReadWrite =3
                    '  adModeRecursive =4194304
                    '  adModeShareDenyNone =16
                    '  adModeShareDenyRead =4
                    '  adModeShareDenyWrite =8
                    '  adModeShareExclusive =12
                    '  adModeUnknown =0
                    '  adModeWrite =2
    Astream.Open
    'Astream.CharSet = "GB2312"
    'Astream.LoadFromFile(recfilen) '装载文件
    'Assp=Astream.size
    Astream.Position = 0 '装载文件时设置为Assp
    'Astream.Writetext tmpstr00,1
    If txt Then
        'data=BytesToStr(data)
        Astream.Charset = CodePage '  "GB2312"
        Astream.Writetext data, 1
        'Astream.CharSet = "GB2312"
    Else
        Astream.Write data
    End If
    'msgbox recfilen
    Astream.SaveToFile recfilen, 2
    ' "F:\temp\a.jpg",2
    Astream.Close
    Set Astream = Nothing
End Function

'
'    '延时函数
'
Public Sub Delay(DelayNum As Long) '毫秒
     Dim Ctr1, Ctr2, Freq As Currency
     Dim Start As Long ', Stime2 As Single
     If QueryPerformanceFrequency(Freq) Then
        QueryPerformanceCounter Ctr1
        Do
            Sleep 1
            DoEvents
            QueryPerformanceCounter Ctr2
        Loop While (Ctr2 - Ctr1) / Freq * 1000 < DelayNum
     Else
'        MsgBox "不支持高精度计数器!"
         '设定开始时间
         Start = timeGetTime
         Do While timeGetTime < Start + DelayNum
            Sleep 1
            DoEvents
         Loop
     End If
End Sub
    
'---------定义函数
Public Sub Initialize() '---初始化获取句柄
    Call AllocConsole
    '获得控制窗口的句柄
    'hConsoleIn = GetStdHandle(STD_INPUT_HANDLE)
    hConsoleOut = GetStdHandle(STD_OUTPUT_HANDLE)
    'hConsoleErr = GetStdHandle(STD_ERROR_HANDLE)
End Sub

Public Sub Terminate() '-----销毁句柄
    Call FreeConsole ' Destroy the console
    'Call CloseHandle(hConsoleIn)
    Call CloseHandle(hConsoleOut)
End Sub

Public Sub COut(szOut As String)   '------文字输出到控制台函数
    WriteConsole hConsoleOut, szOut, LstrLen(szOut), vbNull, vbNull
End Sub

Public Sub setTitle(s As String)
    SetConsoleTitle s '设置窗口标题 '获得控制窗口的句柄
End Sub

Public Sub setCONColor(ByVal f As Long, ByVal b As Long) '---设置文字和背景显示颜色
    If b >= 0 And b <= 15 Then
        If f >= 0 And f <= 15 Then
            b = b * &H10
            SetConsoleTextAttribute hConsoleOut, f Or b
        Else
            'MsgBox "输入的文字颜色代码错误!颜色代码详见:控制台色卡.png", vbCritical, "错误"
        End If
    Else
        'MsgBox "输入的背景颜色代码错误!颜色代码详见:控制台色卡.png", vbCritical, "错误"
    End If
End Sub

根据您的网速和视频资源总大小,下载时间不定。为了便于观看进度,上面使用了控制台窗口输出工作进程。

最后,有人会问,为什么不直接用 VB6 来跑上面的进程呢?

因为,win7 x64 用不了 VB6 ,我又懒得开虚拟机,而这个程序主要是下载文件,并不复杂,所以,偷个懒,能在 excel 里面运行的程序我现在都是尽量在 excel 的 VBA 环境来运行了,实属无奈。同时,现在调试一些 VBS 程序或 ASP 程序我也是用 excel + VBA 。

三、等待

    执行上面的代码,然后等待......

四、拼接

    源码执行下载时,顺便生成了 ffmpeg 拼接用的列表文件,如: lst1.txt ,视频输出文件名为暂定为 Out.mp4

    启动 cmd.exe 切换到前面的工作目录 H:\Media\m3u8download\1-08\ , 即生成的 lst1.txt 所在目录,执行 ffmpeg 拼接命令:

    ffmpeg -y -f concat -safe 0 -i lst1.txt -s 1080x806 -vcodec h264 -vf fps=25,format=yuv420p -b 1500000 Out.mp4

 20220809修改:由于对 ffmpeg 不熟悉,发现以上拼接命令严重问题,即:对原视频进行了重编码,这肯定不好,于是找到直接拼接的参数,不再重编码,快多了:

ffmpeg -y -f concat -safe 0 -i lst1.txt -c copy out.mp4

    然后就等待 ffmpeg 慢慢拼接吧!

五、观影及xls工程下载

    播放视频,查看拼接是否正确。

    若您懒得自己做上面的代码,也可以直接下载我做好的 xls 文件:excelVBA下载m3u8资源.zip-VB文档类资源-CSDN下载,将上面说到的工作目录和根链接地址填入,点击“下载 M3U8”按钮即可,打开这个 xls 文件时记得选择允许运行宏即可:

此记!

‘-------------------------------------------

20220809        修正拼接参数 bug 

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

jessezappy

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值