#使用python和google cloud实现音频转文本,stt(speech to text)
#安装ffmpeg
win10去官网下载解压ffmpeg,然后配置path
#将mp3文件通过ffmpeg转化为wav格式
ffmpeg -i C:\Users\Administratorl\Downloads\input.mp3 C:\Users\Administratorl\Downloads\input.wav
#将wav取前多少秒的视频
ffmpeg -i C:\Users\Administratorl\Downloads\input.wav -ss 0 -t 2100 -c copy C:\Users\Administratorl\Downloads\input1.wav
#使用python代码和google api将音频转文字,实测1分钟以内均可以
import speech_recognition as sr
# 设置音频文件的位置
audio_file = r'C:\Users\Administratorl\Downloads\input.wav'
# 创建 SpeechRecognition 对象
r = sr.Recognizer()
# 读取音频文件
with sr.AudioFile(audio_file) as source:
audio = r.record(source)
# 识别音频文件
try:
print(r.recognize_google(audio, language='en-US'))
except sr.UnknownValueError:
raise 'Google Speech Recognition could not understand audio'
except sr.RequestError as e:
raise 'Could not request results from Google Speech Recognition Service'
#音频过大直接登录google cloud
#google cloud
#speech studio 音频转文字,400mb一下直接转
#cloud storage 上传音频,超过400mb需要先上传
#speech studio_transcriptions 找到转化好的文本文件