最近因项目需求,接了微软认知服务的语音识别接口(以下简称语音识别),期间也踩了很多坑最后成功接通。众所周知,国内有很多语音识别的SDK,但是这些SDK都是偏向于中文语音识别,对于英文的识别率很低,所以经过多家SDK的对比,我选择了微软的语音识别,微软的语音识别对英文的识别率很高。下面就来说下如何来接语音识别。
首先从微软认知服务官方的GitHub上下载SDK包,然后将下载的Speech SDK .unitypackage文件导入Unity工程,然后在场景中新建Text,Button,如下图所示
然后在项目中添加代码,命名为MicroSoftASRDemo,编辑脚本,用一下代码替换,其中的YourSubscriptionKey
使用Speech Services订阅密钥找到并替换该字符串。YourServiceRegion为与您订阅密钥相关联的区域,免费试用版的为westus。(订阅密钥获取方式点击这里)然后将脚本保存,然后挂载到场景中的一个gameobject上。
using UnityEngine;
using UnityEngine.UI;
using Microsoft.CognitiveServices.Speech;
using System;
using UnityEngine.SceneManagement;
#if PLATFORM_ANDROID
using UnityEngine.Android;
#endif
public class MicroSoftASRDemo : MonoBehaviour
{
// Hook up the two properties below with a Text and Button object in your UI.
public Text outputText;
public Button startRecoButton;
public Button returnMainBtn;
private object threadLocker = new object();
private bool waitingForReco;
private string message;
private bool micPermissionGranted = false;
public int Rec = 0;//0:未开始录音 1:录音中 2:结束录音
#if PLATFORM_ANDROID
// Required to manifest microphone permission, cf.
// https://docs.unity3d.com/Manual/android-manifest.html
private Microphone mic;
#endif
public async void ButtonClick()
{
// Creates an instance of a speech config with specified subscription key and service region.
// Replace with your own subscription key and service region (e.g., "westus").
var config = SpeechConfig.FromSubscription("YourSubscriptionKey", "YourServiceRegion");
// Make sure to dispose the recognizer after use!
using (var recognizer = new SpeechRecognizer(config))
{
lock (threadLocker)
{
waitingForReco = true;
Rec = 1;
}
// Starts speech recognition, and returns after a single utterance is recognized. The end of a
// single utterance is determined by listening for silence at the end or until a maximum of 15
// seconds of audio is processed. The task returns the recognition text as result.
// Note: Since RecognizeOnceAsync() returns only a single utterance, it is suitable only for single
// shot recognition like command or query.
// For long-running multi-utterance recognition, use StartContinuousRecognitionAsync() instead.
var result = await recognizer.RecognizeOnceAsync().ConfigureAwait(false);
//print(result + " . . . . . . . . . . . ");
// Checks result.
string newMessage = string.Empty;
if (result.Reason == ResultReason.RecognizedSpeech)
{
newMessage = result.Text;
}
else if (result.Reason == ResultReason.NoMatch)
{
newMessage = "NOMATCH: Speech could not be recognized.";
}
else if (result.Reason == ResultReason.Canceled)
{
var cancellation = CancellationDetails.FromResult(result);
newMessage = $"CANCELED: Reason={cancellation.Reason} ErrorDetails={cancellation.ErrorDetails}";
}
lock (threadLocker)
{
message = newMessage;
SpeechInfo.SpeachText = message;
waitingForReco = false;
Rec = 2;
}
}
}
void Start()
{
if (outputText == null)
{
Debug.LogError("outputText property is null! Assign a UI Text element to it.");
}
else if (startRecoButton == null)
{
message = "startRecoButton property is null! Assign a UI Button to it.";
Debug.LogError(message);
}
else
{
// Continue with normal initialization, Text and Button objects are present.
#if PLATFORM_ANDROID
// Request to use the microphone, cf.
// https://docs.unity3d.com/Manual/android-RequestingPermissions.html
message = "Waiting for mic permission";
if (!Permission.HasUserAuthorizedPermission(Permission.Microphone))
{
Permission.RequestUserPermission(Permission.Microphone);
}
#else
micPermissionGranted = true;
//message = "Click button to recognize speech";
#endif
startRecoButton.onClick.AddListener(ButtonClick);
returnMainBtn.onClick.AddListener(MainScene);
}
}
private void MainScene()
{
SceneManager.LoadScene("Menu");
}
void Update()
{
#if PLATFORM_ANDROID
if (!micPermissionGranted && Permission.HasUserAuthorizedPermission(Permission.Microphone))
{
micPermissionGranted = true;
message = "Click button to recognize speech";
}
#endif
lock (threadLocker)
{
if (startRecoButton != null)
{
startRecoButton.interactable = !waitingForReco && micPermissionGranted;
}
if (outputText != null)
{
outputText.text = message;
Rec = 0;
}
}
}
}
在编辑器中运行,点击button,然后说出英文短语或者句子,识别完成后,你所说的话将会被转录为文本,并将该文本显示到窗口中,至此语音识别已成功接入。
下一篇我将会介绍微软认知服务语音合成的接入方式