Unity接微软认知服务语音识别(Azure-ASR)

最近因项目需求,接了微软认知服务的语音识别接口(以下简称语音识别),期间也踩了很多坑最后成功接通。众所周知,国内有很多语音识别的SDK,但是这些SDK都是偏向于中文语音识别,对于英文的识别率很低,所以经过多家SDK的对比,我选择了微软的语音识别,微软的语音识别对英文的识别率很高。下面就来说下如何来接语音识别。

首先从微软认知服务官方的GitHub上下载SDK包,然后将下载的Speech SDK .unitypackage文件导入Unity工程,然后在场景中新建Text,Button,如下图所示

然后在项目中添加代码,命名为MicroSoftASRDemo,编辑脚本,用一下代码替换,其中的YourSubscriptionKey使用Speech Services订阅密钥找到并替换该字符串。YourServiceRegion为与您订阅密钥相关联的区域,免费试用版的为westus。(订阅密钥获取方式点击这里)然后将脚本保存,然后挂载到场景中的一个gameobject上。

using UnityEngine;
using UnityEngine.UI;
using Microsoft.CognitiveServices.Speech;
using System;
using UnityEngine.SceneManagement;
#if PLATFORM_ANDROID
using UnityEngine.Android;
#endif

public class MicroSoftASRDemo : MonoBehaviour
{
    // Hook up the two properties below with a Text and Button object in your UI.
    public Text outputText;
    public Button startRecoButton;
    public Button returnMainBtn;

    private object threadLocker = new object();
    private bool waitingForReco;
    private string message;

    private bool micPermissionGranted = false;

    public int Rec = 0;//0:未开始录音 1:录音中 2:结束录音


#if PLATFORM_ANDROID
    // Required to manifest microphone permission, cf.
    // https://docs.unity3d.com/Manual/android-manifest.html
    private Microphone mic;
#endif

    public async void ButtonClick()
    {
        // Creates an instance of a speech config with specified subscription key and service region.
        // Replace with your own subscription key and service region (e.g., "westus").
        var config = SpeechConfig.FromSubscription("YourSubscriptionKey", "YourServiceRegion");

        // Make sure to dispose the recognizer after use!
        using (var recognizer = new SpeechRecognizer(config))
        {
            lock (threadLocker)
            {
                waitingForReco = true;
                Rec = 1;
            }

            // Starts speech recognition, and returns after a single utterance is recognized. The end of a
            // single utterance is determined by listening for silence at the end or until a maximum of 15
            // seconds of audio is processed.  The task returns the recognition text as result.
            // Note: Since RecognizeOnceAsync() returns only a single utterance, it is suitable only for single
            // shot recognition like command or query.
            // For long-running multi-utterance recognition, use StartContinuousRecognitionAsync() instead.
            var result = await recognizer.RecognizeOnceAsync().ConfigureAwait(false);
            //print(result + "        . . . . . . . . . . . ");

            // Checks result.
            string newMessage = string.Empty;
            if (result.Reason == ResultReason.RecognizedSpeech)
            {
                newMessage = result.Text;
                
            }
            else if (result.Reason == ResultReason.NoMatch)
            {
                newMessage = "NOMATCH: Speech could not be recognized.";
            }
            else if (result.Reason == ResultReason.Canceled)
            {
                var cancellation = CancellationDetails.FromResult(result);

                newMessage = $"CANCELED: Reason={cancellation.Reason} ErrorDetails={cancellation.ErrorDetails}";
            }

            lock (threadLocker)
            {
                message = newMessage;
                SpeechInfo.SpeachText = message;
                waitingForReco = false;
                Rec = 2;
            }
        }
    }

    void Start()
    {
        if (outputText == null)
        {
            Debug.LogError("outputText property is null! Assign a UI Text element to it.");
        }
        else if (startRecoButton == null)
        {
            message = "startRecoButton property is null! Assign a UI Button to it.";
            Debug.LogError(message);
        }
        else
        {
            // Continue with normal initialization, Text and Button objects are present.

#if PLATFORM_ANDROID
            // Request to use the microphone, cf.
            // https://docs.unity3d.com/Manual/android-RequestingPermissions.html
            message = "Waiting for mic permission";
            if (!Permission.HasUserAuthorizedPermission(Permission.Microphone))
            {
                Permission.RequestUserPermission(Permission.Microphone);
            }
#else
            micPermissionGranted = true;
            //message = "Click button to recognize speech";
#endif
            startRecoButton.onClick.AddListener(ButtonClick);
            returnMainBtn.onClick.AddListener(MainScene);
        }
    }

    private void MainScene()
    {
        SceneManager.LoadScene("Menu");
    }

    void Update()
    {
#if PLATFORM_ANDROID
        if (!micPermissionGranted && Permission.HasUserAuthorizedPermission(Permission.Microphone))
        {
            micPermissionGranted = true;
            message = "Click button to recognize speech";
        }
#endif

        lock (threadLocker)
        {
            if (startRecoButton != null)
            {
                startRecoButton.interactable = !waitingForReco && micPermissionGranted;
            }
            if (outputText != null)
            {
                outputText.text = message;                
                Rec = 0;
            }
        }
    }

}

在编辑器中运行,点击button,然后说出英文短语或者句子,识别完成后,你所说的话将会被转录为文本,并将该文本显示到窗口中,至此语音识别已成功接入。

下一篇我将会介绍微软认知服务语音合成的接入方式

©️2020 CSDN 皮肤主题: 编程工作室 设计师:CSDN官方博客 返回首页