生活中很多时候需要提取视频中的文字,借助AI工具辅助编写python程序提取视频文字可以极大提高工作效率。
以下是利用ChatGPT编写的提取视频文件的代码供大家参考使用,该算法的基本思想是先将视频中的音频提取出来保存为wav文件,然后再从音频文件中提取文字。
import subprocess
import os
import speech_recognition as sr
import imageio_ffmpeg as ffmpeg
def extract_audio_with_ffmpeg(video_path, audio_output="替换为保存音频文件的路径/temp_audio.wav"):
"""使用imageio-ffmpeg从视频中提取音频并保存为wav格式"""
ffmpeg_path = ffmpeg.get_ffmpeg_exe() # 获取ffmpeg的路径
command = [
ffmpeg_path, "-i", video_path, "-vn", "-acodec", "pcm_s16le", "-ar", "44100", "-ac", "2", audio_output
]
try:
subprocess.run(command, check=True)
print(f"音频成功提取到 {audio_output}")
except subprocess.CalledProcessError as e:
print(f"提取音频时出错: {e}")
return None
return audio_output
def audio_to_text(audio_path, language="zh-CN"):
"""使用SpeechRecognition将音频转换为文字(支持中文)"""
r = sr.Recognizer()
try:
with sr.AudioFile(audio_path) as source:
audio = r.record(source) # 加载整个音频文件
# 使用Google Web API进行语音识别
text = r.recognize_google(audio, language=language)
return text
except sr.UnknownValueError:
return "无法识别音频"
except sr.RequestError as e:
return f"API请求失败: {e}"
def save_text_to_file(text, file_path):
"""将识别的文字保存到指定的文本文件"""
with open(file_path, 'w', encoding='utf-8') as file:
file.write(text)
print(f"文字已保存到 {file_path}")
def main(video_file):
# 步骤1:提取音频
audio_file = extract_audio_with_ffmpeg(video_file)
if not audio_file:
return
# 步骤2:语音识别
result = audio_to_text(audio_file)
# 将识别的文字保存到文件
save_text_to_file(result, "recognized_text.txt")
# 可选:删除临时音频文件
if os.path.exists(audio_file):
os.remove(audio_file)
print("识别结果:")
print(result)
if __name__ == "__main__":
video_file = "video.mp4" # 替换为你的视频文件路径
main(video_file)