最近需要开发语音录制识别的功能,于是对相关组件进行学习,并实现了语音录制识别的功能。
主要用到两个组件:
AudioCapturer是音频采集器,用于录制PCM(Pulse Code Modulation)音频数据,使用AudioCapturer开发音频录制功能,可以生产音频文件,也可以生成音频流。
SpeechRecognizer组件可以将音频识别成文字。
需要ohos.permission.MICROPHONE权限
两种语音识别思路:语音录制生成文件再识别文字;语音录制实时识别。
先介绍第一种思路的实现:先进行语音录制,生成语音文件,再将语音文件转换成文字。
具体实现如下:
初始化音频录制组件,创建实例,设置监听事件
static initAudioCapturer(callback:Callback<boolean>) {
let bufferSize: number = 0;
let audioStreamInfo: audio.AudioStreamInfo = {
samplingRate: audio.AudioSamplingRate.SAMPLE_RATE_16000, // 采样率
channels: audio.AudioChannel.CHANNEL_1, // 通道
sampleFormat: audio.AudioSampleFormat.SAMPLE_FORMAT_S16LE, // 采样格式
encodingType: audio.AudioEncodingType.ENCODING_TYPE_RAW // 编码格式
}
let audioCapturerInfo: audio.AudioCapturerInfo = {
source: audio.SourceType.SOURCE_TYPE_MIC, // 音源类型
capturerFlags: 0 // 音频采集器标志
}
let audioCapturerOptions: audio.AudioCapturerOptions = {
streamInfo: audioStreamInfo,
capturerInfo: audioCapturerInfo
}
let path = getContext().filesDir;
let filePath = path + '/EMSSpeechRecognition.pcm';
if (fs.accessSync(filePath)) {
fs.unlinkSync(filePath)
}
SpeechRecognitionUtil.file = fs.openSync(filePath, fs.OpenMode.READ_WRITE | fs.OpenMode.CREATE);
let readDataCallback = (buffer: ArrayBuffer) => {
let options: Options = {
offset: bufferSize,
length: buffer.byteLength
}
fs.writeSync(SpeechRecognitionUtil.file.fd, buffer, options);
bufferSize += buffer.byteLength;
}
audio.createAudioCapturer(audioCapturerOptions, (err, capturer) => { // 创建AudioCapturer实例
if (err) {
console.error(`AudioCapturer:Invoke createAudioCapturer failed, code is ${err.code}, message is ${err.message}`);
callback(false)
return;
}
console.info(`AudioCapturer: create AudioCapturer success`);
if (capturer!== undefined) {
SpeechRecognitionUtil.audioCapturer = capturer;
(SpeechRecognitionUtil.audioCapturer as audio.AudioCapturer).on('readData', readDataCallback);
callback(true)
}else {
callback(false)
}
});
}
开始一次音频录制采集
static startRecord(callback:Callback<boolean>) {
if (SpeechRecognitionUtil.audioCapturer !== undefined) {
let stateGroup = [audio.AudioState.STATE_PREPARED, audio.AudioState.STATE_PAUSED, audio.AudioState.STATE_STOPPED];
if (stateGroup.indexOf((SpeechRecognitionUtil.audioCapturer as audio.AudioCapturer).state.valueOf()) ===
-1) { // 当且仅当状态为STATE_PREPARED、STATE_PAUSED和STATE_STOPPED之一时才能启动采集
console.error(`AudioCapturer: start failed`);
callback(false)
return;
}
if (SpeechRecognitionUtil.stopRecognizeMark==10) {
callback(false)
ToastUtil.showToast('当前存在未完成的识别任务')
return
}
// 启动采集
(SpeechRecognitionUtil.audioCapturer as audio.AudioCapturer).start((err: BusinessError) => {
if (err) {
callback(false)
console.error('AudioCapturer:Capturer start failed.');
} else {
callback(true)
console.info('AudioCapturer:Capturer start success.');
}
});
}
}
停止录音采集
static stopRecord(callback:Callback<Record<string,string>>) {
if (SpeechRecognitionUtil.audioCapturer !== undefined) {
// 只有采集器状态为STATE_RUNNING或STATE_PAUSED的时候才可以停止
if ((SpeechRecognitionUtil.audioCapturer as audio.AudioCapturer).state.valueOf() !==
audio.AudioState.STATE_RUNNING && (SpeechRecognitionUtil.audioCapturer as audio.AudioCapturer).state.valueOf() !==
audio.AudioState.STATE_PAUSED) {
callback({
code:'1',
info:'录音未开启'
})
console.info('Capturer is not running or paused');
return;
}
//停止采集
(SpeechRecognitionUtil.audioCapturer as audio.AudioCapturer).stop((err: BusinessError) => {
if (err) {
callback({
code:'1',
info:'录音停止失败'
})
console.error('Capturer stop failed.');
} else {
fs.close(SpeechRecognitionUtil.file);
SpeechRecognitionUtil.releaseRecord()
console.info('Capturer stop success.');
SpeechRecognitionUtil.beginRecognize((result:Record<string,string>)=>{
callback(result)
})
}
});
}
}
销毁实例,释放录音资源
static releaseRecord() {
if (SpeechRecognitionUtil.audioCapturer !== undefined) {
// 采集器状态不是STATE_RELEASED或STATE_NEW状态,才能release
if ((SpeechRecognitionUtil.audioCapturer as audio.AudioCapturer).state.valueOf() ===
audio.AudioState.STATE_RELEASED ||
(SpeechRecognitionUtil.audioCapturer as audio.AudioCapturer).state.valueOf() === audio.AudioState.STATE_NEW) {
console.info('Capturer already released');
return;
}
//释放资源
(SpeechRecognitionUtil.audioCapturer as audio.AudioCapturer).release((err: BusinessError) => {
if (err) {
console.error('Capturer release failed.');
} else {
console.info('Capturer release success.');
}
});
}
}
至此录音采集结束,接下来就是识别
创建识别引擎,通过callback形式返回
static createEngine(callback:Callback<boolean>) {
// 设置创建引擎参数
let extraParam: Record<string, Object> = {"locate": "CN", "recognizerMode": "long"};
let initParamsInfo: speechRecognizer.CreateEngineParams = {
language: 'zh-CN',
online: 1,
extraParams: extraParam
};
// 调用createEngine方法
speechRecognizer.createEngine(initParamsInfo, (err: BusinessError, speechRecognitionEngine:
speechRecognizer.SpeechRecognitionEngine) => {
if (!err) {
console.info('Succeeded in creating engine.');
// 接收创建引擎的实例
SpeechRecognitionUtil.asrEngine = speechRecognitionEngine;
callback(true)
} else {
// 无法创建引擎时返回错误码1002200001,原因:语种不支持、模式不支持、初始化超时、资源不存在等导致创建引擎失败
// 无法创建引擎时返回错误码1002200006,原因:引擎正在忙碌中,一般多个应用同时调用语音识别引擎时触发
// 无法创建引擎时返回错误码1002200008,原因:引擎正在销毁中
console.error(`Failed to create engine. Code: ${err.code}, message: ${err.message}.`);
callback(false)
}
});
}
查询语种信息,以callback形式返回
static queryRecognizeLanguages(callback:Callback<Record<string,Array<string>>>) {
// 设置查询相关参数
let languageQuery: speechRecognizer.LanguageQuery = {
sessionId: SpeechRecognitionUtil.sessionId
};
if (SpeechRecognitionUtil.asrEngine== undefined){
return
}
// 调用listLanguages方法
SpeechRecognitionUtil.asrEngine.listLanguages(languageQuery, (err: BusinessError, languages: Array<string>) => {
if (!err) {
callback({
params:languages
})
// 接收目前支持的语种信息
console.info(`Succeeded in listing languages, result: ${JSON.stringify(languages)}`);
} else {
callback({})
console.error(`Failed to create engine. Code: ${err.code}, message: ${err.message}.`);
}
});
};
开始识别
static startRecognize() {
// 设置开始识别的相关参数
let recognizerParams: speechRecognizer.StartParams = {
sessionId: SpeechRecognitionUtil.sessionId,
audioInfo: { audioType: 'pcm', sampleRate: 16000, soundChannel: 1, sampleBit: 16 },
extraParams:{maxAudioDuration:8 * 60 * 60 * 1000}
}
if (SpeechRecognitionUtil.asrEngine== undefined){
return
}
// 调用开始识别方法
SpeechRecognitionUtil.asrEngine.startListening(recognizerParams);
};
// 写音频流
static async writeAudio() {
if (SpeechRecognitionUtil.asrEngine== undefined){
return
}
let ctx = getContext();
// let filenames: string[] = fileIo.listFileSync(ctx.filesDir);
// if (filenames.length <= 0) {
// return;
// }
// let filePath: string = `${ctx.filesDir}/${filenames[0]}`;
let filePath: string = `${ctx.filesDir}/EMSSpeechRecognition.pcm`;
let file = fs.openSync(filePath, fs.OpenMode.READ_WRITE);
try {
let buf: ArrayBuffer = new ArrayBuffer(1280);
let offset: number = 0;
while (1280 == fs.readSync(file.fd, buf,{
offset: offset
})) {
let uint8Array: Uint8Array = new Uint8Array(buf);
SpeechRecognitionUtil.asrEngine.writeAudio(SpeechRecognitionUtil.sessionId, uint8Array);
await SpeechRecognitionUtil.countDownLatch(1);
offset = offset + 1280;
}
} catch (err) {
console.error(`Failed to read from file. Code: ${err.code}, message: ${err.message}.`);
} finally {
if (null != file) {
fs.closeSync(file);
}
}
}
设置识别结果回调
static setRecognizeListener(callback:Callback<string>) {
let returnResult:string=''
let lastTimeResult:string=''
// 创建回调对象
let setListener: speechRecognizer.RecognitionListener = {
// 开始识别成功回调
onStart(sessionId: string, eventMessage: string) {
console.info(`onStart, sessionId: ${sessionId} eventMessage: ${eventMessage}`);
//开始准备关闭识别
SpeechRecognitionUtil.stopRecognize()
},
// 事件回调
onEvent(sessionId: string, eventCode: number, eventMessage: string) {
console.info(`onEvent, sessionId: ${sessionId} eventCode: ${eventCode} eventMessage: ${eventMessage}`);
},
// 识别结果回调,包括中间结果和最终结果
onResult(sessionId: string, result: speechRecognizer.SpeechRecognitionResult) {
//重置关闭标识
SpeechRecognitionUtil.stopRecognizeMark=10
console.info(`onResult, sessionId: ${sessionId} sessionId: ${JSON.stringify(result)}`);
if (sessionId!=SpeechRecognitionUtil.sessionId) {
return
}
if (result.isFinal) {
returnResult=returnResult+result.result
callback(returnResult
)
}else if (lastTimeResult!=result.result) {
lastTimeResult=result.result
callback(returnResult+lastTimeResult
)
}
},
// 识别完成回调
onComplete(sessionId: string, eventMessage: string) {
console.info(`onComplete, sessionId: ${sessionId} eventMessage: ${eventMessage}`);
SpeechRecognitionUtil.stopRecognizeMark=11
},
// 错误回调,错误码通过本方法返回
// 如:返回错误码1002200006,识别引擎正忙,引擎正在识别中
// 更多错误码请参考错误码参考
onError(sessionId: string, errorCode: number, errorMessage: string) {
console.error(`onError, sessionId: ${sessionId} errorCode: ${errorCode} errorMessage: ${errorMessage}`);
SpeechRecognitionUtil.stopRecognizeMark=11
},
}
if (SpeechRecognitionUtil.asrEngine== undefined){
return
}
// 设置回调
SpeechRecognitionUtil.asrEngine.setListener(setListener);
};
停止识别同时释放资源
static async stopRecognize(){
console.info(`AudioCapturer:stopRecognize, sessionId`);
SpeechRecognitionUtil.stopRecognizeMark=10
while (SpeechRecognitionUtil.stopRecognizeMark== 10) {
SpeechRecognitionUtil.stopRecognizeMark--;
await SpeechRecognitionUtil.sleep(3000);
}
console.info(`AudioCapturer:stopRecognize, sessionId`);
if (SpeechRecognitionUtil.asrEngine!== undefined) {
// 取消识别,调用 cancel 方法
SpeechRecognitionUtil.asrEngine.cancel(SpeechRecognitionUtil.sessionId)
// 结束识别,调用 finish 方法
SpeechRecognitionUtil.asrEngine.finish(SpeechRecognitionUtil.sessionId)
// 释放语音识别引擎资源,调用shutdown方法
SpeechRecognitionUtil.asrEngine.shutdown()
}
}
第一种思路的实现介绍完了
第二种思路是实时语音识别,语音录制会生成音频流,同时将音频流输入给speechRecognizer识别,这样就可以完成一边录制一边识别的操作了这种方式更简单。
来说下具体实现:
实时语音识别开始
static async start(callback:Callback<Record<string,string>>){
if (!SpeechRecognitionUtil.checkPermission()) {
ToastUtil.showLong("请在设置中打开权限")
AppUtil.toAppSetting()
callback({
cede:'1',
info:'录音权限未授权'
})
return
}
//语音识别组件设置
// 设置创建引擎参数
let extraParam: Record<string, Object> = {"locate": "CN", "recognizerMode": "long"};
let initParamsInfo: speechRecognizer.CreateEngineParams = {
language: 'zh-CN',
online: 1,
extraParams: extraParam
};
// 调用createEngine方法 接收创建引擎的实例
SpeechRecognitionUtil.asrEngine =await speechRecognizer.createEngine(initParamsInfo);
if (SpeechRecognitionUtil.asrEngine!== undefined) {
let returnResult:string=''
let lastTimeResult:string=''
//设置回调
SpeechRecognitionUtil.asrEngine.setListener({
// 开始识别成功回调
onStart(sessionId: string, eventMessage: string) {
console.info(`onStart, sessionId: ${sessionId} eventMessage: ${eventMessage}`);
},
// 事件回调
onEvent(sessionId: string, eventCode: number, eventMessage: string) {
console.info(`onEvent, sessionId: ${sessionId} eventCode: ${eventCode} eventMessage: ${eventMessage}`);
},
// 识别结果回调,包括中间结果和最终结果
onResult(sessionId: string, result: speechRecognizer.SpeechRecognitionResult) {
console.info(`onResult, sessionId: ${sessionId} sessionId: ${JSON.stringify(result)}`);
if (sessionId!=SpeechRecognitionUtil.sessionId) {
return
}
if (result.isFinal) {
returnResult=returnResult+result.result
callback({
cede:'0',
info:returnResult})
}else if (lastTimeResult!=result.result) {
lastTimeResult=result.result
callback({
cede:'0',
info:returnResult+lastTimeResult}
)
}
},
// 识别完成回调
onComplete(sessionId: string, eventMessage: string) {
console.info(`onComplete, sessionId: ${sessionId} eventMessage: ${eventMessage}`);
},
// 错误回调,错误码通过本方法返回
// 如:返回错误码1002200006,识别引擎正忙,引擎正在识别中
// 更多错误码请参考错误码参考
onError(sessionId: string, errorCode: number, errorMessage: string) {
console.error(`onError, sessionId: ${sessionId} errorCode: ${errorCode} errorMessage: ${errorMessage}`);
},
})
// await SpeechRecognitionUtil.sleep(40);
//开始识别
// 调用开始识别方法
SpeechRecognitionUtil.asrEngine.startListening({
sessionId: SpeechRecognitionUtil.sessionId,
audioInfo: { audioType: 'pcm', sampleRate: 16000, soundChannel: 1, sampleBit: 16 },
extraParams:{maxAudioDuration:8 * 60 * 60 * 1000}
});
}else {
callback({
cede:'1',
info:'语音识别引擎创建失败'
})
}
let audioStreamInfo: audio.AudioStreamInfo = {
samplingRate: audio.AudioSamplingRate.SAMPLE_RATE_16000, // 采样率
channels: audio.AudioChannel.CHANNEL_1, // 通道
sampleFormat: audio.AudioSampleFormat.SAMPLE_FORMAT_S16LE, // 采样格式
encodingType: audio.AudioEncodingType.ENCODING_TYPE_RAW // 编码格式
}
let audioCapturerInfo: audio.AudioCapturerInfo = {
source: audio.SourceType.SOURCE_TYPE_MIC, // 音源类型
capturerFlags: 0 // 音频采集器标志
}
let audioCapturerOptions: audio.AudioCapturerOptions = {
streamInfo: audioStreamInfo,
capturerInfo: audioCapturerInfo
}
SpeechRecognitionUtil.audioCapturer = await audio.createAudioCapturer(audioCapturerOptions);
if (SpeechRecognitionUtil.audioCapturer!== undefined) {
(SpeechRecognitionUtil.audioCapturer as audio.AudioCapturer).on('readData', (buffer) => {
// 5. 调用 writeAudio 方法,开始写入音频流。读取音频文件时,开发者需预先准备一个pcm格式音频文件。
SpeechRecognitionUtil.asrEngine?.writeAudio(SpeechRecognitionUtil.sessionId, new Uint8Array(buffer))
});
let stateGroup = [audio.AudioState.STATE_PREPARED, audio.AudioState.STATE_PAUSED, audio.AudioState.STATE_STOPPED];
if (stateGroup.indexOf((SpeechRecognitionUtil.audioCapturer as audio.AudioCapturer).state.valueOf()) ===
-1) { // 当且仅当状态为STATE_PREPARED、STATE_PAUSED和STATE_STOPPED之一时才能启动采集
console.error(`AudioCapturer: start failed`);
callback({
cede:'1',
info:'录音组件开始失败'
})
return;
}
// 启动采集
(SpeechRecognitionUtil.audioCapturer as audio.AudioCapturer).start();
}else {
callback({
cede:'1',
info:'录音组件创建失败'
})
}
}
实时语音识别结束
static stop(callback:Callback<Record<string,string>>){
//录音组件
if (SpeechRecognitionUtil.audioCapturer !== undefined) {
// 只有采集器状态为STATE_RUNNING或STATE_PAUSED的时候才可以停止
if ((SpeechRecognitionUtil.audioCapturer as audio.AudioCapturer).state.valueOf() !==
audio.AudioState.STATE_RUNNING && (SpeechRecognitionUtil.audioCapturer as audio.AudioCapturer).state.valueOf() !==
audio.AudioState.STATE_PAUSED) {
callback({
code:'1',
info:'录音未开启'
})
console.info('Capturer is not running or paused');
return;
}
//停止采集
(SpeechRecognitionUtil.audioCapturer as audio.AudioCapturer).stop((err: BusinessError) => {
if (err) {
callback({
code:'1',
info:'录音停止失败'
})
console.error('Capturer stop failed.');
} else {
console.info('Capturer stop success.');
}
});
// 采集器状态不是STATE_RELEASED或STATE_NEW状态,才能release
if ((SpeechRecognitionUtil.audioCapturer as audio.AudioCapturer).state.valueOf() ===
audio.AudioState.STATE_RELEASED ||
(SpeechRecognitionUtil.audioCapturer as audio.AudioCapturer).state.valueOf() === audio.AudioState.STATE_NEW) {
console.info('Capturer already released');
return;
}
//释放资源
(SpeechRecognitionUtil.audioCapturer as audio.AudioCapturer).release((err: BusinessError) => {
if (err) {
console.error('Capturer release failed.');
} else {
console.info('Capturer release success.');
}
});
}
//识别组件
if (SpeechRecognitionUtil.asrEngine!== undefined) {
// 取消识别,调用 cancel 方法
SpeechRecognitionUtil.asrEngine.cancel(SpeechRecognitionUtil.sessionId)
// 结束识别,调用 finish 方法
SpeechRecognitionUtil.asrEngine.finish(SpeechRecognitionUtil.sessionId)
// 释放语音识别引擎资源,调用shutdown方法
SpeechRecognitionUtil.asrEngine.shutdown()
}
}
}
大致实现就是这样,大家如果有问题可以留言一起讨论学习