多线程下载视频，并运用Fmmpeg合成-CSDN博客

本文链接：https://blog.csdn.net/weixin_42575020/article/details/105539646

上一篇博客里我们提到了关于视频加密的破解，现在就先讲一下思路，然后直接上代码，哪里写的不合适，或者你有更厉害的方法，可以一起讨论一下。

快速下载这一块主要是运用多线程或者协程来实现，但是我们知道这样下载的视频会是乱序的，如果直接合并操作，视频会出现错乱的情况，然后我们的做法就是将下载的视频流保存到一个文件里，利用文件自动的排序，合理化视频流的顺序。然后用Fmmpeg模块将文件中的视频流频接到一起，合成一个完整的视频，并删除掉保存的视频流文件。

这里要注意的是，.m3u8文件会有几种形式，有加密的带有URL，不加密的（不需要key），带有偏移的（iv），还有不带的，所以在请求视频流之前要根据情况采取不同的处理操作。（我下面的代码里可能没有涵盖到所有的情况，需要大家根据自己的情况做一下调整）下面直接上完整代码。

# -*- coding: utf-8 -*-
from __future__ import print_function
from __future__ import absolute_import
 
import datetime
import os, sys
import re
import threading
#from eventlet.green import threading
import time
from urllib.parse import urlparse
#import requests
import eventlet

#python版本不同导入队列的方式不同
if sys.version_info.major < 3:
    import Queue
    import ConfigParser as configparser
else:   
    import queue as Queue
    import configparser
 
requests = eventlet.import_patched('requests')
 
# 预下载，获取m3u8文件，读出ts链接，并写入文档
def down(headers, url, base_url):
    m3u8_dirname = os.path.dirname (url)
    m3u8_urlp = urlparse (url)
    # 当ts文件链接不完整时，需拼凑
    resp = requests.get(url, headers=headers)
    m3u8_text = resp.text
    # print(m3u8_text)
    # 按行拆分m3u8文档
    ts_queue = eventlet.queue.Queue()
    #ts_queue = eventlet.queue.LifoQueue ()
    #ts_queue = Queue(10000)
    lines = m3u8_text.split('\n')
    s = len(lines)
    # 找到文档中含有ts字段的行
    #concatfile = 'cache/' + "s" + '.txt'
    concatfile = 'cache/' + "decode" + '.m3u8'
    if os.path.exists(concatfile):
        os.remove(concatfile)
    s_count = 1
    for i,line in enumerate(lines):
        #if len(line) >=3 and line[-3:] == '.ts':
        if '.ts' in line:
            if 'http' in line:
                # print("ts>>", line)
                http_line = line
                pass
            else:
                path = os.path.dirname (line)
                if len(path) == 0:
                    http_line = m3u8_dirname + '/' + line
                else:
                    http_line = m3u8_urlp.scheme + '://' + m3u8_urlp.netloc + '' + line
                    #line = base_url + line
            #filename = re.search('([a-zA-Z0-9-_]+.ts)', line).group(1).strip()
            #filename = os.path.basename (line)
            filename = str(s_count).zfill(10)+'.ts'
            if not os.path.exists('cache/' +filename):
                #print ("  Add ", filename)
                #ts_queue.put(line)
                ts_queue.put ((filename, http_line, 0))
            else:
                #print ("  Had ", filename)
                pass
            # print('ts>>',line)
 
            # 一定要先写文件，因为线程的下载是无序的，文件无法按照
            # 123456。。。去顺序排序，而文件中的命名也无法保证是按顺序的
            # 这会导致下载的ts文件无序，合并时，就会顺序错误，导致视频有问题。
            #open(concatfile, 'a+').write("file %s\n" % filename)
            open(concatfile, 'a+').write("%s\n" % filename)
            #print("\r", '文件写入中', i, "/", s, end="", flush=True)
            s_count += 1
            print("\r", '写入中', s_count, "/", s, http_line, end="", flush=True)
        else:
            # 若发现了 加密 key，则把 key 本地化
            key_re = re.search ("(URI=\".*\.key\")", line)
            if key_re != None:
                key_url = key_re.group(1).strip()
                key_url = key_url[5:-1]
                path = os.path.dirname (key_url)
                if len(path) == 0:
                    http_key = m3u8_dirname + '/' + key_url
                else:
                    http_key = m3u8_urlp.scheme + '://' + m3u8_urlp.netloc + '' + key_url
 
                key_line = line[:key_re.start()+5] + "key.key" + line[key_re.end()-1:]
                print (line, key_url, http_key, key_line,"\n")
 
                key_r = requests.get(http_key, stream=True, headers=headers, timeout=(15, 60), verify=True)
                with open('cache/key.key', 'wb') as fp:
                    for chunk in key_r.iter_content(5242):
                        if chunk:
                            fp.write(chunk)
                open(concatfile, 'a+').write(key_line+"\n")
            else:                
                open(concatfile, 'a+').write(line+"\n")
    return ts_queue, concatfile
 
# 线程模式，执行线程下载
def run(ts_queue, headers, pool):
    while True:
        try:
            #url, sleepTime = ts_queue.get (True, 0.5)
            filename, url, sleepTime = ts_queue.get (True, 0.5)
        except Queue.Empty :
            break
 
        if sleepTime > 0:
            eventlet.sleep (sleepTime)
        #filename = re.search('([a-zA-Z0-9-_]+.ts)', url).group(1).strip()
        #filename = os.path.basename (url)
        requests.packages.urllib3.disable_warnings()
 
        try:
            r = requests.get(url, stream=True, headers=headers, timeout=(15, 60), verify=False)
            r.raise_for_status ()
            with open('cache/' + filename, 'wb') as fp:
                for chunk in r.iter_content(5242):
                    if chunk:
                        fp.write(chunk)
            print("\r", '任务文件 ', filename, ' 下载成功', pool.running(), ts_queue.qsize(), end="         ", flush=True)
        except Exception as exc:
            print( '任务文件 ', filename, ' 下载失败, 代码:', exc)
            ts_queue.put((filename, url, 5))
            #eventlet.sleep (2)
    #return True
 
# 视频合并方法，使用ffmpeg
def merge(concatfile, name):
    try:
        #path = 'cache/' + name + '.mp4'
        path = 'e:/rm/80s/' + name + '.mp4'
        # command = 'ffmpeg -y -f concat -i %s -crf 18 -ar 48000 -vcodec libx264 -c:a aac -r 25 -g 25 -keyint_min 25 -strict -2 %s' % (concatfile, path)
        command  = "ffmpeg -allowed_extensions ALL -protocol_whitelist \"file,http,crypto,tcp\" "
        #command += ' -y -f concat -i %s -bsf:a aac_adtstoasc -c copy %s' % (concatfile, path)
        command += ' -y -i %s -bsf:a aac_adtstoasc -c copy %s' % (concatfile, path)
        print (command)
        os.system(command)
        print('视频合并完成')
    except:
        print('合并失败')
 
def remove():
    dir = 'cache/'
    """
    #for line in open('cache/s.txt'):
    for line in open('cache/decode.m3u8'):
        #line = re.search('file (.*?ts)', line).group(1).strip()
        line = re.search('(.*?ts)', line).group(1).strip()
        # print(line)
        os.remove(dir + line)
    print("ts文件全部删除")
    try:
        os.remove('cache/s.txt')
        print('文件删除成功')
    except:
        print('文件删除失败')
    """
    command = "del " + dir + "*/Q"
    os.system(command)
 
# headers 和 base_url 必须根据实际网站 ！手动 ！ 设置
if __name__ == '__main__':
    # 测试用链接：https://yiyi.55zuiday.com/ppvod/70B5A6E3A150A99882E28EC793CAF519.m3u8 
    # 链接电影：地球最后的夜晚
    # https://youku.com-ok-sohu.com/20191110/20128_fd24c5a9/1000k/hls/61033a1fdc2000000.ts
    #base_url = 'https://yiyi.55zuiday.com/'
    base_url = 'http://cn5.download05.com/hls/20190721/e7339c09fff1c6c817c2b3cfad9b1d39/1563674290/film_00000.ts'
    headers = {
        #'referer': 'https://yiyi.55zuiday.com/share/wVuAcJFy1tMy4t0x',
        'referer': 'http://www.douying99.com/play/47309_m3u8_0.html',
        'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.97 Safari/537.36'
    }
 
    requests.adapters.DEFAULT_RETRIES = 5
 
    name = input('请输入视频名称：')
    headers['referer'] = input('请输入网页链接：').strip()
    url = input('请输入视频m3u8链接：').strip()
 
    start = datetime.datetime.now().replace(microsecond=0)
    print("目录文件开始写入")
    s, concatfile = down(headers, url, base_url)
    print('\n')
    print("目录文件写入结束")
    # 获取队列元素数量
    t_num = s.qsize()
    # 根据数量来开线程数，每五个元素一个线程
    # 最大开到50个
    print("下载任务开始")
    """
    if num > 5:
        t_num = num // 5
    else:
        t_num = 1
    """
    if t_num > 60:
        t_num = 60
    # print(s,concatfile)
 
    pool = eventlet.GreenPool(t_num)
    run_args={'ts_queue': s, 'headers': headers, 'pool': pool}
    for i in range(t_num):
        pool.spawn_n (run, **run_args)
    pool.waitall()
    """
    threads = []
    for i in range(t_num):
        t = threading.Thread(target=run, name='th-' + str(i), kwargs={'ts_queue': s, 'headers': headers})
        t.setDaemon(True)
        threads.append(t)
    for t in threads:
        time.sleep(0.4)
        t.start()
    for t in threads:
        t.join()
    """
    print('\n')
    print("下载任务结束")
    end = datetime.datetime.now().replace(microsecond=0)
    print('写文件及下载耗时：' + str(end - start))
    merge(concatfile, name)
    remove()
    over = datetime.datetime.now().replace(microsecond=0)
    print('合并及删除文件耗时：' + str(over - end))
    print("所有任务结束 ", name)
    print('任务总时长：', over - start)