VOSK是一个离线开源语音识别工具。它可以识别16种语言,包括中文。
虽然说只能解析wav格式音频,但是我用MP3也能识别(不知道能不能保证精准度)
VOSKMODELPATH是vosk包的地址;
亲测windows和Luinx都有效;
public static String getWord(String path) {
String filePath = voiceToAudio(path);
File f = new File(filePath);
try{
Assert.isTrue(StringUtils.hasLength(VOSKMODELPATH), "无效的VOS模块!");
byte[] bytes = Files.readAllBytes(Paths.get(filePath));
// 转换为16KHZ
reSamplingAndSave(bytes, filePath);
RandomAccessFile rdf = null;
rdf = new RandomAccessFile(f, "r");
short track=toShort(read(rdf, 22, 2));
rdf.close();
LibVosk.setLogLevel(LogLevel.WARNINGS);
Model model = new Model(VOSKMODELPATH);
InputStream ais = AudioSystem.getAudioInputStream(new BufferedInputStream(new FileInputStream(filePath)));
// 采样率为音频采样率的声道倍数
Recognizer recognizer = new Recognizer(model, 16000*track);
int nbytes;
byte[] b = new byte[4096];
int i = 0;
while ((nbytes = ais.read(b)) >= 0) {
i += 1;
if (recognizer.acceptWaveForm(b, nbytes)) {
// System.out.println(recognizer.getResult());
} else {
// System.out.println(recognizer.getPartialResult());
}
}
//提取的文字
String result = recognizer.getFinalResult();
//关闭资源
ais.close();
recognizer.close();
//wav文件删除
if(f.isFile()&&f.exists()){
f.delete();
}
if (StringUtils.hasLength(result)) {
JSONObject jsonObject = JSON.parseObject(result);
return jsonObject.getString("text").replace(" ", "");
}
return result;
}catch (Exception e){
e.printStackTrace();
}
return "";
}