假日无事,手撸几行代码实现B站缓存数据视频合成,主要原理在:
1、查找m4s文件,二进制编辑,去除前9个0,并本地保存
2、调用ffmpeg进行音视频合成,需要注意中文路径问题(下载中文版的ffmpeg,放到系统path中)
代码如下:
#!/usr/bin/python # -*- coding: utf-8 -*- import sys import os import glob import re import time import json import io import argparse import concurrent.futures import subprocess import ffmpeg def parse_args(): parser = argparse.ArgumentParser(description='video and audio m4s to mp4') parser.add_argument('--workdir', type=str, default='D:\\bilibili\\', help='the dir to work ') parser.add_argument('--num_threads', type=int, default=8, help='the num of threads to combine video and audio file to mp4') args = parser.parse_args() return args def remove9zero(fn): with open(fn, 'rb+') as f: data = f.read() f.seek(0) f.write(data[9:]) f.truncate() def renamefile(oldfn, newfn): try: os.rename(oldfn, newfn) print("rename complete!") except FileNotFoundError: print("file not found!") except Exception as e: print("error:", str(e)) def getfileInfo(folder): result = {} videoinfo = folder +'\\.videoInfo' with open(videoinfo, 'r', encoding='UTF-8') as f: data = json.load(f) #print(data['title']) result['title'] = data['title'] result['folder_title'] = data['groupTitle'] return result def validateTitle(title): rstr = r"[\/\\\:\*\?\"\<\>\|]" new_title = re.sub(rstr, "_", str(title)) return new_title def ffmpeg2mp4(f1, f2, fn, folder): str1 = f'ffmpeg.exe -i {f1} -i {f2} -c copy {args.workdir} video\\{folder} \\{fn} .mp4' print(f'ffmpeg.exe -i {f1} -i {f2} -c copy {args.workdir} video\\{folder} \\{fn}.mp4') os.system(f'ffmpeg.exe -i {f1} -i {f2} -c copy {args.workdir} video\\{folder} \\{fn}.mp4') def combinefile(argslist, args): '''with concurrent.futures.ThreadPoolExecutor( max_workers=int(args.num_threads) ) as executor: for value in argslist: f1 = value['f1'] f2 = value['f2'] fn = value['fn'] folder = value['folder'] executor.submit(ffmpeg2mp4, f1, f2, fn, folder) executor.shutdown()''' for value in argslist: f1 = value['f1'] f2 = value['f2'] fn = value['fn'].replace(' ', '') folder = value['folder'].replace(' ', '') #os.system(f'ffmpeg.exe -i {f1} -i {f2} -c copy {args.workdir}video\\{folder}\\{fn}.mp4') #os.system(f'ffmpeg.exe -i {f1} -i {f2} -c copy {args.workdir}video\\temp.mp4') #subprocess.run(f'ffmpeg.exe -i {f1} -i {f2} -y -c copy {args.workdir}video\\temp.mp4', encoding="utf-8") #subprocess.run(f'move {args.workdir}video\\temp.mp4 {args.workdir}video\\{folder}\\{fn}.mp4', encoding="utf-8") subprocess.run(f'ffmpeg.exe -i {f1} -i {f2} -y -c copy {args.workdir}video\\{folder}\\{fn}.mp4') print(f'ffmpeg.exe -i {f1} -i {f2} -y -c copy {args.workdir}video\\{folder}\\{fn}.mp4') def main(): args = parse_args() # 查找.videoInfo文件到列表,并将列表目录存入集合 files_videoinfo = glob.glob(args.workdir + "download\\*\\.videoInfo") fv_set = set() for fi in files_videoinfo: fv_set.add(os.path.dirname(fi)) # 查找偶数个.m4s文件的目录,并存入folder_list folder_list = [] folder_num = 0 for item in fv_set: file_dir = glob.glob(item + "\\*.m4s") #file_dir = glob.glob(item + "\\*.mp4") if len(file_dir) % 2 != 0: print(item + 'have not ' + str(len(file_dir)) +'.m4s files') elif len(file_dir) == 2: folder_num = folder_num + 1 folder_list.append(item) else: print(item) print('total folder num is ' + str(folder_num)) # 对folder_list进行遍历处理,移除前9个0,重命名为mp4,获取对应的中文名称 argslist = [] for folder in folder_list: files = glob.glob(folder + "\\*.m4s") #files = glob.glob(folder + "\\*.mp4") for file in files: remove9zero(file) renamefile(file, file[:-3] + 'mp4') dict_item = {} finfo = getfileInfo(folder) grouptitle = validateTitle(finfo['folder_title']).replace(' ', '') title = validateTitle(finfo['title']).replace(' ', '') dict_item.clear() dict_item['f1'] = files[0][:-3] + 'mp4' dict_item['f2'] = files[1][:-3] + 'mp4' dict_item['fn'] = title dict_item['folder'] = grouptitle argslist.append(dict_item) # 查找folder的数量,并创建对应的文件夹 folder_list = set() for argitem in argslist: folder_list.add(argitem['folder']) print('folder num is ' + str(len(folder_list))) for folder_name in folder_list: path = args.workdir + 'video\\' + folder_name if not os.path.exists(path): os.mkdir(path) # 多线程进行视频音频合并 combinefile(argslist, args) if __name__ == '__main__': main()