鸿蒙: 语音识别(语音转文字)

场景介绍

将一段音频信息(短语音模式不超过60s,长语音模式不超过8h)转换为文本。

文档中心

注: 仅供参考: 根据文档书写报错,报错位置为创建引擎时. 未找到原因:(似乎是模拟器不支持,需要真机)

 创建错误: TypeError: Cannot read property createEngine of undefined

1. 导入

import { speechRecognizer } from '@kit.CoreSpeechKit';
import { BusinessError } from '@kit.BasicServicesKit';

2. 调用createEngine方法,对引擎进行初始化,并创建SpeechRecognitionEngine实例createEngine方法提供了两种调用形式,当前以其中一种作为示例,其他方式可参考API参考

  static v2tEngine: speechRecognizer.SpeechRecognitionEngine
  static initParamsInfo: speechRecognizer.CreateEngineParams = {
    language: 'zh-CN',
    online: 1,
    extraParams: VoiceToText.extraParam
  };

    // 1. 如果没有创建引擎,创建引擎
    if (!VoiceToText.v2tEngine) {
      // 调用createEngine方法创建引擎
      console.log('创建引擎前');
      try {
        VoiceToText.v2tEngine = await speechRecognizer.createEngine(VoiceToText.initParamsInfo)
      } catch (e) {
        console.log('创建错误:', e);
      }

      console.log('创建引擎后');
    }

3. 得到SpeechRecognitionEngine实例对象后,实例化RecognitionListener对象,调用setListener方法设置回调,用来接收语音识别相关的回调信息。

  // 创建回调对象
  static setListener: speechRecognizer.RecognitionListener = {
    // 开始识别成功回调
    onStart(sessionId: string, eventMessage: string) {
      console.info(`onStart, sessionId: ${sessionId} eventMessage: ${eventMessage}`);
    },
    // 事件回调
    onEvent(sessionId: string, eventCode: number, eventMessage: string) {
      console.info(`onEvent, sessionId: ${sessionId} eventCode: ${eventCode} eventMessage: ${eventMessage}`);
    },
    // 识别结果回调,包括中间结果和最终结果
    onResult(sessionId: string, result: speechRecognizer.SpeechRecognitionResult) {
      VoiceToText.callback(result)
      if (result.isLast) {
        console.log('wechat', 'VoiceToText v2tEngine isLast')
        // 关闭
        VoiceToText.v2tEngine.finish(sessionId) // 拿到结果了关闭v2tEngine
      }
      console.info(`onResult, sessionId: ${sessionId} sessionId: ${JSON.stringify(result)}`);
    },
    // 识别完成回调
    onComplete(sessionId: string, eventMessage: string) {
      console.info(`onComplete, sessionId: ${sessionId} eventMessage: ${eventMessage}`);
    },
    // 错误回调,错误码通过本方法返回
    // 如:返回错误码1002200006,识别引擎正忙,引擎正在识别中
    // 更多错误码请参考错误码参考
    onError(sessionId: string, errorCode: number, errorMessage: string) {
      console.error(`onError, sessionId: ${sessionId} errorCode: ${errorCode} errorMessage: ${errorMessage}`);
    }
  }

// 2. 创建语音识别回调对象
    VoiceToText.v2tEngine.setListener(VoiceToText.setListener)

4. 设置开始识别的相关参数,调用startListening方法,开始合成。

static sessionId: string
// 3. 启动语音识别
  static startListening() {
  VoiceToText.sessionId = util.generateRandomUUID()
    let audioParam: speechRecognizer.AudioInfo = {
      audioType: 'pcm',
      sampleRate: 16000,
      soundChannel: 1,
      sampleBit: 16
    };
    let extraParam: Record<string, Object> = { "vadBegin": 2000, "vadEnd": 3000, "maxAudioDuration": 40000 };
    VoiceToText.sessionId = util.generateRandomUUID()
    let recognizerParams: speechRecognizer.StartParams = {
      sessionId: VoiceToText.sessionId,
      audioInfo: audioParam,
      extraParams: extraParam
    };
    // 调用开始识别方法
    VoiceToText.v2tEngine.startListening(recognizerParams);
  }

5. 传入音频流,调用writeAudio方法,开始写入音频流。读取音频文件时,开发者需预先准备一个pcm格式音频文件.注意写入之后必须等待一下在执行操作,

  // 写入缓存区
  static async writeAudio(filePath: string) {
    // filePath 待读取的语音文件
    const file = fs.openSync(filePath, fs.OpenMode.READ_WRITE)

    try {
      let buf: ArrayBuffer = new ArrayBuffer(1280);
      let offset: number = 0;
      // readSync 以同步方法从文件读取数据。返回实际读取的数据长度,单位字节。
      while (1280 === fs.readSync(file.fd, buf, { offset })) {
        let uint8Array: Uint8Array = new Uint8Array(buf); // 待识别的音频数据,当前仅支持音频数据长度为640字节或1280字节。
        // 可以通过如下方式获取音频流:1、通过录音获取音频流;2、从音频文件中读取音频流
        // 2、从音频文件中读取音频流:demo参考
        // 写入音频流,音频流长度仅支持640或1280
        VoiceToText.v2tEngine.writeAudio(VoiceToText.sessionId, uint8Array)
        await VoiceToText.countDownLatch(1) // 必须等待一下
        offset += 1280
      }
    } catch (err) {
      console.error(`Failed to read from file. Code: ${err.code}, message: ${err.message}.`);
    } finally {
      if (null != file) {
        fs.closeSync(file);
      }
      VoiceToText.v2tEngine.finish(VoiceToText.sessionId)
    }
  }

  // 计时
  static async countDownLatch(count: number) {
    while (count > 0) {
      await VoiceToText.sleep(40);
      count--;
    }
  }

  // 睡眠
  static sleep(ms: number): Promise<void> {
    return new Promise(resolve => setTimeout(resolve, ms));
  }

6. 其它

// 6.1结束识别
asrEngine.finish(sessionId);
// 6.2 取消识别
asrEngine.cancel(sessionId);
// 6.3 释放识别引擎资源
asrEngine.shutdown();
// 6.4 当需要查询语音识别服务支持的语种信息,可调用listLanguages方法
// 6.4.1 设置查询相关的参数
let languageQuery: speechRecognizer.LanguageQuery = {
  sessionId: sessionId
};
// 6.4.2 调用listLanguages方法
asrEngine.listLanguages(languageQuery).then((res: Array<string>) => {
  console.info(`Succeeded in listing languages, result: ${JSON.stringify(res)}.`);
}).catch((err: BusinessError) => {
  console.error(`Failed to list languages. Code: ${err.code}, message: ${err.message}.`);
});

7. 完整示例

import { speechRecognizer } from '@kit.CoreSpeechKit';
import { BusinessError } from '@kit.BasicServicesKit';
import { util } from '@kit.ArkTS';
import { fileIo as fs } from '@kit.CoreFileKit';

export class VoiceToText {
  static v2tEngine: speechRecognizer.SpeechRecognitionEngine
  static sessionId: string
  static callback: (result:speechRecognizer.SpeechRecognitionResult) => void
  // 创建引擎,通过callback形式返回
  // 设置创建引擎参数
  static extraParam: Record<string, Object> = { "locate": "CN", "recognizerMode": "short" };
  static initParamsInfo: speechRecognizer.CreateEngineParams = {
    language: 'zh-CN',
    online: 1,
    extraParams: VoiceToText.extraParam
  };
  // 创建回调对象
  static setListener: speechRecognizer.RecognitionListener = {
    // 开始识别成功回调
    onStart(sessionId: string, eventMessage: string) {
      console.info(`onStart, sessionId: ${sessionId} eventMessage: ${eventMessage}`);
    },
    // 事件回调
    onEvent(sessionId: string, eventCode: number, eventMessage: string) {
      console.info(`onEvent, sessionId: ${sessionId} eventCode: ${eventCode} eventMessage: ${eventMessage}`);
    },
    // 识别结果回调,包括中间结果和最终结果
    onResult(sessionId: string, result: speechRecognizer.SpeechRecognitionResult) {
      VoiceToText.callback(result)
      if (result.isLast) {
        console.log('wechat', 'VoiceToText v2tEngine isLast')
        // 关闭
        VoiceToText.v2tEngine.finish(sessionId) // 拿到结果了关闭v2tEngine
      }
      console.info(`onResult, sessionId: ${sessionId} sessionId: ${JSON.stringify(result)}`);
    },
    // 识别完成回调
    onComplete(sessionId: string, eventMessage: string) {
      console.info(`onComplete, sessionId: ${sessionId} eventMessage: ${eventMessage}`);
    },
    // 错误回调,错误码通过本方法返回
    // 如:返回错误码1002200006,识别引擎正忙,引擎正在识别中
    // 更多错误码请参考错误码参考
    onError(sessionId: string, errorCode: number, errorMessage: string) {
      console.error(`onError, sessionId: ${sessionId} errorCode: ${errorCode} errorMessage: ${errorMessage}`);
    }
  }

  static startListening() {
    let audioParam: speechRecognizer.AudioInfo = {
      audioType: 'pcm',
      sampleRate: 16000,
      soundChannel: 1,
      sampleBit: 16
    };
    let extraParam: Record<string, Object> = { "vadBegin": 2000, "vadEnd": 3000, "maxAudioDuration": 40000 };
    VoiceToText.sessionId = util.generateRandomUUID()
    let recognizerParams: speechRecognizer.StartParams = {
      sessionId: VoiceToText.sessionId,
      audioInfo: audioParam,
      extraParams: extraParam
    };
    // 调用开始识别方法
    VoiceToText.v2tEngine.startListening(recognizerParams);
  }

  // 计时
  static async countDownLatch(count: number) {
    while (count > 0) {
      await VoiceToText.sleep(40);
      count--;
    }
  }

  // 睡眠
  static sleep(ms: number): Promise<void> {
    return new Promise(resolve => setTimeout(resolve, ms));
  }

  static async start(filePath: string, callback: (result: speechRecognizer.SpeechRecognitionResult) => void) {
    // 1. 如果没有创建引擎,创建引擎
    if (!VoiceToText.v2tEngine) {
      // 调用createEngine方法创建引擎
      console.log('创建引擎前');
      try {
        VoiceToText.v2tEngine = await speechRecognizer.createEngine(VoiceToText.initParamsInfo)
      } catch (e) {
        console.log('创建错误:', e);
      }

      console.log('创建引擎后');
    }
    // 有没有正在转换:有,肯定不是正在说话
    // if (VoiceToText.v2tEngine.isBusy()) {
    //   console.log('wechat', 'VoiceToText v2tEngine isBusy')
    //   return
    // }

    VoiceToText.callback = callback
    console.log(' 回调结果');
    // 2. 创建语音识别回调对象
    VoiceToText.v2tEngine.setListener(VoiceToText.setListener)
    console.log(' 创建语音');
    // 3. 启动语音识别
    VoiceToText.startListening()
    console.log(' 启动语音');
    // 4.写音频流(写入缓冲区)
    VoiceToText.writeAudio(filePath)
    console.log(' 写入缓冲区');
  }

  // 写入缓存区
  static async writeAudio(filePath: string) {
    // filePath 待读取的语音文件
    const file = fs.openSync(filePath, fs.OpenMode.READ_WRITE)

    try {
      let buf: ArrayBuffer = new ArrayBuffer(1280);
      let offset: number = 0;
      // readSync 以同步方法从文件读取数据。返回实际读取的数据长度,单位字节。
      while (1280 === fs.readSync(file.fd, buf, { offset })) {
        let uint8Array: Uint8Array = new Uint8Array(buf); // 待识别的音频数据,当前仅支持音频数据长度为640字节或1280字节。
        // 可以通过如下方式获取音频流:1、通过录音获取音频流;2、从音频文件中读取音频流
        // 2、从音频文件中读取音频流:demo参考
        // 写入音频流,音频流长度仅支持640或1280
        VoiceToText.v2tEngine.writeAudio(VoiceToText.sessionId, uint8Array)
        await VoiceToText.countDownLatch(1) // 必须等待一下
        offset += 1280
      }
    } catch (err) {
      console.error(`Failed to read from file. Code: ${err.code}, message: ${err.message}.`);
    } finally {
      if (null != file) {
        fs.closeSync(file);
      }
      VoiceToText.v2tEngine.finish(VoiceToText.sessionId)
    }
  }
}

  • 4
    点赞
  • 4
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值