HarmonyOS 鸿蒙Next实现语音录制识别

doubletan1990

已于 2024-09-30 10:58:11 修改

阅读量358

点赞数 5

文章标签： harmonyos 华为

于 2024-09-30 10:57:06 首次发布

本文链接：https://blog.csdn.net/doubletan1990/article/details/142639849

版权

最近需要开发语音录制识别的功能，于是对相关组件进行学习，并实现了语音录制识别的功能。

主要用到两个组件：

AudioCapturer是音频采集器，用于录制PCM（Pulse Code Modulation）音频数据，使用AudioCapturer开发音频录制功能，可以生产音频文件，也可以生成音频流。

SpeechRecognizer组件可以将音频识别成文字。

需要ohos.permission.MICROPHONE权限

两种语音识别思路:语音录制生成文件再识别文字；语音录制实时识别。

先介绍第一种思路的实现：先进行语音录制，生成语音文件，再将语音文件转换成文字。

具体实现如下：

初始化音频录制组件，创建实例，设置监听事件

static initAudioCapturer(callback:Callback<boolean>) {
    let bufferSize: number = 0;
    let audioStreamInfo: audio.AudioStreamInfo = {
      samplingRate: audio.AudioSamplingRate.SAMPLE_RATE_16000, // 采样率
      channels: audio.AudioChannel.CHANNEL_1, // 通道
      sampleFormat: audio.AudioSampleFormat.SAMPLE_FORMAT_S16LE, // 采样格式
      encodingType: audio.AudioEncodingType.ENCODING_TYPE_RAW // 编码格式
    }
    let audioCapturerInfo: audio.AudioCapturerInfo = {
      source: audio.SourceType.SOURCE_TYPE_MIC, // 音源类型
      capturerFlags: 0 // 音频采集器标志
    }
    let audioCapturerOptions: audio.AudioCapturerOptions = {
      streamInfo: audioStreamInfo,
      capturerInfo: audioCapturerInfo
    }

    let path = getContext().filesDir;
    let filePath = path + '/EMSSpeechRecognition.pcm';
    if (fs.accessSync(filePath)) {
      fs.unlinkSync(filePath)
    }
    SpeechRecognitionUtil.file = fs.openSync(filePath, fs.OpenMode.READ_WRITE | fs.OpenMode.CREATE);

    let readDataCallback = (buffer: ArrayBuffer) => {
      let options: Options = {
        offset: bufferSize,
        length: buffer.byteLength
      }
      fs.writeSync(SpeechRecognitionUtil.file.fd, buffer, options);
      bufferSize += buffer.byteLength;
    }
    audio.createAudioCapturer(audioCapturerOptions, (err, capturer) => { // 创建AudioCapturer实例
      if (err) {
        console.error(`AudioCapturer:Invoke createAudioCapturer failed, code is ${err.code}, message is ${err.message}`);
        callback(false)
        return;
      }
      console.info(`AudioCapturer: create AudioCapturer success`);
      if (capturer!== undefined) {
        SpeechRecognitionUtil.audioCapturer = capturer;
        (SpeechRecognitionUtil.audioCapturer as audio.AudioCapturer).on('readData', readDataCallback);
        callback(true)
      }else {
        callback(false)
      }
    });
  }

开始一次音频录制采集

 static startRecord(callback:Callback<boolean>) {
    if (SpeechRecognitionUtil.audioCapturer !== undefined) {
      let stateGroup = [audio.AudioState.STATE_PREPARED, audio.AudioState.STATE_PAUSED, audio.AudioState.STATE_STOPPED];
      if (stateGroup.indexOf((SpeechRecognitionUtil.audioCapturer as audio.AudioCapturer).state.valueOf()) ===
        -1) { // 当且仅当状态为STATE_PREPARED、STATE_PAUSED和STATE_STOPPED之一时才能启动采集
        console.error(`AudioCapturer: start failed`);
        callback(false)
        return;
      }
      if (SpeechRecognitionUtil.stopRecognizeMark==10) {
        callback(false)
        ToastUtil.showToast('当前存在未完成的识别任务')
        return
      }
      // 启动采集
      (SpeechRecognitionUtil.audioCapturer as audio.AudioCapturer).start((err: BusinessError) => {
        if (err) {
          callback(false)
          console.error('AudioCapturer:Capturer start failed.');
        } else {
          callback(true)
          console.info('AudioCapturer:Capturer start success.');
        }
      });
    }
  }

停止录音采集

static stopRecord(callback:Callback<Record<string,string>>) {
    if (SpeechRecognitionUtil.audioCapturer !== undefined) {
      // 只有采集器状态为STATE_RUNNING或STATE_PAUSED的时候才可以停止
      if ((SpeechRecognitionUtil.audioCapturer as audio.AudioCapturer).state.valueOf() !==
      audio.AudioState.STATE_RUNNING && (SpeechRecognitionUtil.audioCapturer as audio.AudioCapturer).state.valueOf() !==
      audio.AudioState.STATE_PAUSED) {
        callback({
          code:'1',
          info:'录音未开启'
        })
        console.info('Capturer is not running or paused');
        return;
      }

      //停止采集
      (SpeechRecognitionUtil.audioCapturer as audio.AudioCapturer).stop((err: BusinessError) => {
        if (err) {
          callback({
            code:'1',
            info:'录音停止失败'
          })
          console.error('Capturer stop failed.');
        } else {
          fs.close(SpeechRecognitionUtil.file);
          SpeechRecognitionUtil.releaseRecord()
          console.info('Capturer stop success.');
          SpeechRecognitionUtil.beginRecognize((result:Record<string,string>)=>{
            callback(result)
          })
        }
      });
    }
  }

销毁实例，释放录音资源

static releaseRecord() {
    if (SpeechRecognitionUtil.audioCapturer !== undefined) {
      // 采集器状态不是STATE_RELEASED或STATE_NEW状态，才能release
      if ((SpeechRecognitionUtil.audioCapturer as audio.AudioCapturer).state.valueOf() ===
      audio.AudioState.STATE_RELEASED ||
        (SpeechRecognitionUtil.audioCapturer as audio.AudioCapturer).state.valueOf() === audio.AudioState.STATE_NEW) {
        console.info('Capturer already released');
        return;
      }

      //释放资源
      (SpeechRecognitionUtil.audioCapturer as audio.AudioCapturer).release((err: BusinessError) => {
        if (err) {
          console.error('Capturer release failed.');
        } else {
          console.info('Capturer release success.');
        }
      });
    }
  }

至此录音采集结束，接下来就是识别

创建识别引擎，通过callback形式返回

 static createEngine(callback:Callback<boolean>) {
    // 设置创建引擎参数
    let extraParam: Record<string, Object> = {"locate": "CN", "recognizerMode": "long"};
    let initParamsInfo: speechRecognizer.CreateEngineParams = {
      language: 'zh-CN',
      online: 1,
      extraParams: extraParam
    };

    // 调用createEngine方法
    speechRecognizer.createEngine(initParamsInfo, (err: BusinessError, speechRecognitionEngine:
      speechRecognizer.SpeechRecognitionEngine) => {
      if (!err) {
        console.info('Succeeded in creating engine.');
        // 接收创建引擎的实例
        SpeechRecognitionUtil.asrEngine = speechRecognitionEngine;

        callback(true)
      } else {
        // 无法创建引擎时返回错误码1002200001，原因：语种不支持、模式不支持、初始化超时、资源不存在等导致创建引擎失败
        // 无法创建引擎时返回错误码1002200006，原因：引擎正在忙碌中，一般多个应用同时调用语音识别引擎时触发
        // 无法创建引擎时返回错误码1002200008，原因：引擎正在销毁中
        console.error(`Failed to create engine. Code: ${err.code}, message: ${err.message}.`);
        callback(false)
      }
    });
  }

查询语种信息，以callback形式返回

static queryRecognizeLanguages(callback:Callback<Record<string,Array<string>>>) {
    // 设置查询相关参数
    let languageQuery: speechRecognizer.LanguageQuery = {
      sessionId: SpeechRecognitionUtil.sessionId
    };
    if (SpeechRecognitionUtil.asrEngine== undefined){
      return
    }
    // 调用listLanguages方法
    SpeechRecognitionUtil.asrEngine.listLanguages(languageQuery, (err: BusinessError, languages: Array<string>) => {
      if (!err) {
        callback({
          params:languages
        })
        // 接收目前支持的语种信息
        console.info(`Succeeded in listing languages, result: ${JSON.stringify(languages)}`);
      } else {
        callback({})
        console.error(`Failed to create engine. Code: ${err.code}, message: ${err.message}.`);
      }
    });
  };

开始识别

static startRecognize() {
    // 设置开始识别的相关参数
    let recognizerParams: speechRecognizer.StartParams = {
      sessionId: SpeechRecognitionUtil.sessionId,
      audioInfo: { audioType: 'pcm', sampleRate: 16000, soundChannel: 1, sampleBit: 16 },
      extraParams:{maxAudioDuration:8 * 60 * 60 * 1000}
    }
    if (SpeechRecognitionUtil.asrEngine== undefined){
      return
    }
    // 调用开始识别方法
    SpeechRecognitionUtil.asrEngine.startListening(recognizerParams);
  };

  // 写音频流
  static async writeAudio() {
    if (SpeechRecognitionUtil.asrEngine== undefined){
      return
    }
    let ctx = getContext();
    // let filenames: string[] = fileIo.listFileSync(ctx.filesDir);
    // if (filenames.length <= 0) {
    //   return;
    // }
    // let filePath: string = `${ctx.filesDir}/${filenames[0]}`;
    let filePath: string = `${ctx.filesDir}/EMSSpeechRecognition.pcm`;
    let file = fs.openSync(filePath, fs.OpenMode.READ_WRITE);
    try {
      let buf: ArrayBuffer = new ArrayBuffer(1280);
      let offset: number = 0;
      while (1280 == fs.readSync(file.fd, buf,{
        offset: offset
      })) {
        let uint8Array: Uint8Array = new Uint8Array(buf);
        SpeechRecognitionUtil.asrEngine.writeAudio(SpeechRecognitionUtil.sessionId, uint8Array);
        await SpeechRecognitionUtil.countDownLatch(1);
        offset = offset + 1280;
      }
    } catch (err) {
      console.error(`Failed to read from file. Code: ${err.code}, message: ${err.message}.`);
    } finally {
      if (null != file) {
        fs.closeSync(file);
      }
    }
  }

设置识别结果回调

static setRecognizeListener(callback:Callback<string>) {
    let  returnResult:string=''
    let  lastTimeResult:string=''
    // 创建回调对象
    let setListener: speechRecognizer.RecognitionListener = {
      // 开始识别成功回调
      onStart(sessionId: string, eventMessage: string) {
        console.info(`onStart, sessionId: ${sessionId} eventMessage: ${eventMessage}`);
        //开始准备关闭识别
        SpeechRecognitionUtil.stopRecognize()
      },
      // 事件回调
      onEvent(sessionId: string, eventCode: number, eventMessage: string) {
        console.info(`onEvent, sessionId: ${sessionId} eventCode: ${eventCode} eventMessage: ${eventMessage}`);
      },
      // 识别结果回调，包括中间结果和最终结果
      onResult(sessionId: string, result: speechRecognizer.SpeechRecognitionResult) {
        //重置关闭标识
        SpeechRecognitionUtil.stopRecognizeMark=10

        console.info(`onResult, sessionId: ${sessionId} sessionId: ${JSON.stringify(result)}`);
        if (sessionId!=SpeechRecognitionUtil.sessionId) {
          return
        }
        if (result.isFinal) {
          returnResult=returnResult+result.result
          callback(returnResult
          )
        }else if (lastTimeResult!=result.result) {
          lastTimeResult=result.result
          callback(returnResult+lastTimeResult
          )
        }
      },
      // 识别完成回调
      onComplete(sessionId: string, eventMessage: string) {
        console.info(`onComplete, sessionId: ${sessionId} eventMessage: ${eventMessage}`);
        SpeechRecognitionUtil.stopRecognizeMark=11
      },
      // 错误回调，错误码通过本方法返回
      // 如：返回错误码1002200006，识别引擎正忙，引擎正在识别中
      // 更多错误码请参考错误码参考
      onError(sessionId: string, errorCode: number, errorMessage: string) {
        console.error(`onError, sessionId: ${sessionId} errorCode: ${errorCode} errorMessage: ${errorMessage}`);
        SpeechRecognitionUtil.stopRecognizeMark=11
      },
    }
    if (SpeechRecognitionUtil.asrEngine== undefined){
      return
    }
    // 设置回调
    SpeechRecognitionUtil.asrEngine.setListener(setListener);
  };

停止识别同时释放资源

static async stopRecognize(){
    console.info(`AudioCapturer:stopRecognize, sessionId`);
    SpeechRecognitionUtil.stopRecognizeMark=10
    while (SpeechRecognitionUtil.stopRecognizeMark== 10) {
      SpeechRecognitionUtil.stopRecognizeMark--;
      await SpeechRecognitionUtil.sleep(3000);
    }
    console.info(`AudioCapturer:stopRecognize, sessionId`);
    if (SpeechRecognitionUtil.asrEngine!== undefined) {
      // 取消识别，调用 cancel 方法
      SpeechRecognitionUtil.asrEngine.cancel(SpeechRecognitionUtil.sessionId)
      // 结束识别，调用 finish 方法
      SpeechRecognitionUtil.asrEngine.finish(SpeechRecognitionUtil.sessionId)
      // 释放语音识别引擎资源，调用shutdown方法
      SpeechRecognitionUtil.asrEngine.shutdown()
    }
  }

第一种思路的实现介绍完了

第二种思路是实时语音识别，语音录制会生成音频流，同时将音频流输入给speechRecognizer识别，这样就可以完成一边录制一边识别的操作了这种方式更简单。

来说下具体实现：

实时语音识别开始

 static async start(callback:Callback<Record<string,string>>){
    if (!SpeechRecognitionUtil.checkPermission()) {
      ToastUtil.showLong("请在设置中打开权限")
      AppUtil.toAppSetting()
      callback({
        cede:'1',
        info:'录音权限未授权'
      })
      return
    }

    //语音识别组件设置
    // 设置创建引擎参数
    let extraParam: Record<string, Object> = {"locate": "CN", "recognizerMode": "long"};
    let initParamsInfo: speechRecognizer.CreateEngineParams = {
      language: 'zh-CN',
      online: 1,
      extraParams: extraParam
    };

    // 调用createEngine方法 接收创建引擎的实例
    SpeechRecognitionUtil.asrEngine =await speechRecognizer.createEngine(initParamsInfo);
    if (SpeechRecognitionUtil.asrEngine!== undefined) {
      let  returnResult:string=''
      let  lastTimeResult:string=''
      //设置回调
      SpeechRecognitionUtil.asrEngine.setListener({
        // 开始识别成功回调
        onStart(sessionId: string, eventMessage: string) {
          console.info(`onStart, sessionId: ${sessionId} eventMessage: ${eventMessage}`);
        },
        // 事件回调
        onEvent(sessionId: string, eventCode: number, eventMessage: string) {
          console.info(`onEvent, sessionId: ${sessionId} eventCode: ${eventCode} eventMessage: ${eventMessage}`);
        },
        // 识别结果回调，包括中间结果和最终结果
        onResult(sessionId: string, result: speechRecognizer.SpeechRecognitionResult) {

          console.info(`onResult, sessionId: ${sessionId} sessionId: ${JSON.stringify(result)}`);
          if (sessionId!=SpeechRecognitionUtil.sessionId) {
            return
          }
          if (result.isFinal) {
            returnResult=returnResult+result.result
            callback({
              cede:'0',
              info:returnResult})
          }else if (lastTimeResult!=result.result) {
            lastTimeResult=result.result
            callback({
              cede:'0',
              info:returnResult+lastTimeResult}
            )
          }
        },
        // 识别完成回调
        onComplete(sessionId: string, eventMessage: string) {
          console.info(`onComplete, sessionId: ${sessionId} eventMessage: ${eventMessage}`);
        },
        // 错误回调，错误码通过本方法返回
        // 如：返回错误码1002200006，识别引擎正忙，引擎正在识别中
        // 更多错误码请参考错误码参考
        onError(sessionId: string, errorCode: number, errorMessage: string) {
          console.error(`onError, sessionId: ${sessionId} errorCode: ${errorCode} errorMessage: ${errorMessage}`);
        },
      })
      // await SpeechRecognitionUtil.sleep(40);
      //开始识别
      // 调用开始识别方法
      SpeechRecognitionUtil.asrEngine.startListening({
        sessionId: SpeechRecognitionUtil.sessionId,
        audioInfo: { audioType: 'pcm', sampleRate: 16000, soundChannel: 1, sampleBit: 16 },
        extraParams:{maxAudioDuration:8 * 60 * 60 * 1000}
      });
    }else {
      callback({
        cede:'1',
        info:'语音识别引擎创建失败'
      })
    }


    let audioStreamInfo: audio.AudioStreamInfo = {
      samplingRate: audio.AudioSamplingRate.SAMPLE_RATE_16000, // 采样率
      channels: audio.AudioChannel.CHANNEL_1, // 通道
      sampleFormat: audio.AudioSampleFormat.SAMPLE_FORMAT_S16LE, // 采样格式
      encodingType: audio.AudioEncodingType.ENCODING_TYPE_RAW // 编码格式
    }
    let audioCapturerInfo: audio.AudioCapturerInfo = {
      source: audio.SourceType.SOURCE_TYPE_MIC, // 音源类型
      capturerFlags: 0 // 音频采集器标志
    }
    let audioCapturerOptions: audio.AudioCapturerOptions = {
      streamInfo: audioStreamInfo,
      capturerInfo: audioCapturerInfo
    }

    SpeechRecognitionUtil.audioCapturer = await audio.createAudioCapturer(audioCapturerOptions);
    if (SpeechRecognitionUtil.audioCapturer!== undefined) {
      (SpeechRecognitionUtil.audioCapturer as audio.AudioCapturer).on('readData', (buffer) => {
        // 5. 调用 writeAudio 方法，开始写入音频流。读取音频文件时，开发者需预先准备一个pcm格式音频文件。
        SpeechRecognitionUtil.asrEngine?.writeAudio(SpeechRecognitionUtil.sessionId, new Uint8Array(buffer))
      });

      let stateGroup = [audio.AudioState.STATE_PREPARED, audio.AudioState.STATE_PAUSED, audio.AudioState.STATE_STOPPED];
      if (stateGroup.indexOf((SpeechRecognitionUtil.audioCapturer as audio.AudioCapturer).state.valueOf()) ===
        -1) { // 当且仅当状态为STATE_PREPARED、STATE_PAUSED和STATE_STOPPED之一时才能启动采集
        console.error(`AudioCapturer: start failed`);
        callback({
          cede:'1',
          info:'录音组件开始失败'
        })
        return;
      }
      // 启动采集
      (SpeechRecognitionUtil.audioCapturer as audio.AudioCapturer).start();
    }else {
      callback({
        cede:'1',
        info:'录音组件创建失败'
      })
    }
  }

实时语音识别结束

 static stop(callback:Callback<Record<string,string>>){
    //录音组件
    if (SpeechRecognitionUtil.audioCapturer !== undefined) {
      // 只有采集器状态为STATE_RUNNING或STATE_PAUSED的时候才可以停止
      if ((SpeechRecognitionUtil.audioCapturer as audio.AudioCapturer).state.valueOf() !==
      audio.AudioState.STATE_RUNNING && (SpeechRecognitionUtil.audioCapturer as audio.AudioCapturer).state.valueOf() !==
      audio.AudioState.STATE_PAUSED) {
        callback({
          code:'1',
          info:'录音未开启'
        })
        console.info('Capturer is not running or paused');
        return;
      }

      //停止采集
      (SpeechRecognitionUtil.audioCapturer as audio.AudioCapturer).stop((err: BusinessError) => {
        if (err) {
          callback({
            code:'1',
            info:'录音停止失败'
          })
          console.error('Capturer stop failed.');
        } else {
          console.info('Capturer stop success.');
        }
      });

      // 采集器状态不是STATE_RELEASED或STATE_NEW状态，才能release
      if ((SpeechRecognitionUtil.audioCapturer as audio.AudioCapturer).state.valueOf() ===
      audio.AudioState.STATE_RELEASED ||
        (SpeechRecognitionUtil.audioCapturer as audio.AudioCapturer).state.valueOf() === audio.AudioState.STATE_NEW) {
        console.info('Capturer already released');
        return;
      }
      //释放资源
      (SpeechRecognitionUtil.audioCapturer as audio.AudioCapturer).release((err: BusinessError) => {
        if (err) {
          console.error('Capturer release failed.');
        } else {
          console.info('Capturer release success.');
        }
      });
    }

    //识别组件
    if (SpeechRecognitionUtil.asrEngine!== undefined) {
      // 取消识别，调用 cancel 方法
      SpeechRecognitionUtil.asrEngine.cancel(SpeechRecognitionUtil.sessionId)
      // 结束识别，调用 finish 方法
      SpeechRecognitionUtil.asrEngine.finish(SpeechRecognitionUtil.sessionId)
      // 释放语音识别引擎资源，调用shutdown方法
      SpeechRecognitionUtil.asrEngine.shutdown()
    }
  }

}

大致实现就是这样，大家如果有问题可以留言一起讨论学习