目录
dragonfly
阿里达摩院FunASR:一款高效的端到端语音识别工具包
不错的功能介绍
音频转特征向量
GitHub - librosa/librosa: Python library for audio and music analysis
librosa安装
2024.04.27 测试ok Win11系统
pip install librosa
import os
import numpy as np
from transformers import Wav2Vec2Processor, Wav2Vec2Model
import torch
import librosa
def load_example_input(audio_path, processor=None):
if processor is None:
processor = Wav2Vec2Processor.from_pretrained("facebook/wav2vec2-base-960h")
speech_array, sampling_rate = librosa.load(os.path.join(audio_path), sr=16000)
audio_feature = np.squeeze(processor(speech_array, sampling_rate=sampling_rate).input_values)
audio_feature = np.reshape(audio_feature, (-1, audio_feature.shape[0]))
return torch.FloatTensor(audio_feature)
audio_path=r'demo/wav/man.wav'
load_example_input(audio_path)
语音识别
pip install SpeechRecognition
pip install pyaudio
import librosa
import speech_recognition as sr
# 录制音频
r = sr.Recognizer()
with sr.Microphone() as source:
print("请开始说话...")
audio = r.listen(source)
# 将音频转换为文本
try:
text = r.recognize_google(audio)
print("识别结果:", text)
except sr.UnknownValueError:
print("无法识别音频")
except sr.RequestError as e:
print(f"请求出错:{e}")