-
安装vosk
pip install vosk
-
下载示例代码
git clone https://github.com/alphacep/vosk-api.git git clone https://gitee.com/dimld/vosk-api.git # gitee资源
-
下载模型
去官网下载预编译的模型文件,或者使用指令下载,主要是这个vosk-model-cn-0.15.zip
,另外两个没用上wget -c https://alphacephei.com/vosk/models/vosk-model-small-cn-0.22.zip wget -c https://alphacephei.com/vosk/models/vosk-model-cn-0.15.zip wget -c https://alphacephei.com/vosk/models/vosk-model-cn-kaldi-multicn-0.15.zip
-
修改测试代码
vi /opt/vosk/vosk-api/python/example/test_simple.py
注释掉如下代码:
# rec.SetPartialWords(True)
要不会报如下错误:
AttributeError: 'KaldiRecognizer' object has no attribute 'SetPartialWords'
-
配置模型
解压vosk-model-cn-0.15.zip
文件,并将解压后的文件夹名称修改为model
,随便命名也行,在test_simple.py
文件内写入该地址model = Model("model")
,最好写绝对路径,避免开机启动时找不到文件。 -
测试语音识别
python3 test_simple.py test2.wav
-
使用Flask搭建简单识别接口
import random import time from flask import Flask,request,json # vosk from vosk import Model, KaldiRecognizer, SetLogLevel import wave import os # system import sys import os import datetime import time import json import requests # 语音识别模型 # print(os.path.dirname(__file__) + os.sep + "model") voskmodel = Model(os.path.dirname(__file__) + os.sep + "model") # 实例化一个web服务对象 app = Flask(__name__) # 构造一个接受post请求的响应 @app.route('/',methods=['POST']) def postRequest(): data = {} data['data'] = {'text':'Not Found!'} data['code'] = 404 path = request.form.get('path') typ = request.form.get('typ') # url/path if typ == 'url': downlaod(path, os.path.dirname(__file__) + os.sep + 'voice.wav') res = getwav(voskmodel, os.path.dirname(__file__) + os.sep + 'voice.wav') data['data'] = res data['code'] = 200 elif typ == 'path': res = getwav(voskmodel, path) data['data'] = res data['code'] = 200 return data def downlaod(url, file_path): headers = { "User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64; rv:68.0) Gecko/20100101 Firefox/68.0" } r = requests.get(url=url, headers=headers) with open(file_path, "wb") as f: f.write(r.content) f.flush() def getwav(model,path): startTime = datetime.datetime.now() print('识别开始:'+ startTime.strftime('%Y-%m-%d %H:%M:%S')) SetLogLevel(0) wf = wave.open(path, "rb") if wf.getnchannels() != 1 or wf.getsampwidth() != 2 or wf.getcomptype() != "NONE": print ("Audio file must be WAV format mono PCM.") exit (1) rec = KaldiRecognizer(model, wf.getframerate()) rec.SetWords(True) while True: data = wf.readframes(4000) if len(data) == 0: break if rec.AcceptWaveform(data): rec.Result() else: rec.PartialResult() res = json.loads(rec.FinalResult()) print('识别结果:'+ str(res['text'])) endTime = datetime.datetime.now() print('识别结束:'+ endTime.strftime('%Y-%m-%d %H:%M:%S')) useTime = str(int((endTime-startTime).total_seconds())) print('识别耗时:' + useTime + '秒') res['time'] = useTime return res if __name__ == '__main__': # 运行服务,并确定服务运行的IP和端口 app.run('127.0.0.1', '9090')
两种识别模式:
- 远程识别,通过拉取音频文件识别
{
'path': 'http://127.0.0.1:8000/files/2022/08/xpxpgbcl.wav'
'typ' : 'url'
}
- 本地识别,通过获取本地路径识别
{
'path': 'E:\……/xpxpgbcl.wav'
'typ' : 'path'
}