目录
ffmpeg提取字幕 没测试成功
ffmpeg -i input_video.mp4 -map 0:s:0 output_subtitle.srt
import subprocess
from moviepy.editor import VideoFileClip
video = VideoFileClip('video_o.mp4')
# subprocess.run(['ffmpeg', '-i', 'video_o.mp4', '-map', '0', '-c:s', 'srt','subtitles.srt'])
subprocess.run(['ffmpeg', '-i', 'video_o.mp4', '-map', '0:s:0','subtitles.srt'])
opencv 加ocr
import cv2
import pytesseract
# 加载视频
video_path = 'path/to/video.mp4'
video = cv2.VideoCapture(video_path)
# 设置字幕文件保存路径
output_file = 'path/to/subtitles.srt'
subtitles = []
# 逐帧处理视频
while True:
ret, frame = video.read()
if not ret:
break
# 将帧转换为灰度图像
gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
# 使用Tesseract提取文本
text = pytesseract.image_to_string(gray)
# 获取当前帧的时间戳
timestamp = video.get(cv2.CAP_PROP_POS_MSEC)
# 将文本和时间戳添加到字幕列表中
subtitles.append((timestamp, text))
# 关闭视频流
video.release()
# 将字幕保存为SRT文件
with open(output_file, 'w') as f:
for i, (timestamp, text) in enumerate(subtitles):
start_time = int(timestamp)
end_time = int(timestamp) + 1000 # 假设每帧持续1秒钟
f.write(f'{i+1}\n')
f.write(f'{start_time} --> {end_time}\n')
f.write(f'{text}\n\n')
https://blog.51cto.com/u_16213402/7617702