1.先注册百度开放平台的账号,然后按文档申请试用资格及创建应用:https://ai.baidu.com/ai-doc/SPEECH/qknh9i8ed
Windows平台选择“不需要”就可以了。
2.下载C# SDK包
解压以后里面包含这两个文件夹,我工程里面用的是net45,将文件夹放入unity Assets/Plugins文件夹下:
对应unity的Api Compatibility Level属性设置要改为.NET 4.X,设置路径为PlayerSetting-OtherSetting-Configuration-Api Compatibility Level
至此百度语音SDK导入完毕,目前我自己总结了两种方式去实现语音识别的方式:
第一种是用unity的UnityWebRequest去实现,是看的一位博主的教程,链接地址是:Unity百度语音识别-CSDN博客
第二种是看官方文档以后获取SDK的接口来实现的,具体实现方式如下:
在Git上下载了SDK源码进行学习,代码里面有具体功能实现的对应接口,源码地址:GitHub - Baidu-AIP/dotnet-sdk: 百度AI开放平台 .Net SDK
下载完毕解压后如下图,从Git的说明文档上可知speech文件夹中即是语音识别的代码:
在Asr类中,找到“识别语音数据”的方法接口JObject Recognize
byte[] data:音频数据;
string format:音频格式;
int rate:采样频率;
options:语言类型,默认为1537普通话,还支持粤语、四川话、英语等,具体看官方文档介绍。
1.进行录音采集保存
void StartRecord()
{
Debug.LogError("开始");
saveAudioClip = Microphone.Start(currentDeviceName, false, recordMaxTime, recordFrequency);
}
2.对录音进行转换,将音频转换为Byte文件
public byte[] ConvertClipToBytes(AudioClip audioClip)
{
float[] samples = new float[audioClip.samples];
audioClip.GetData(samples, 0);
short[] intData = new short[samples.Length];
byte[] bytesData = new byte[samples.Length * 2];
int rescaleFactor = 32767;
for (int i = 0; i < samples.Length; i++)
{
intData[i] = (short)(samples[i] * rescaleFactor);
byte[] byteArr = new byte[2];
byteArr = BitConverter.GetBytes(intData[i]);
byteArr.CopyTo(bytesData, i * 2);
}
return bytesData;
}
3.转换完毕通过SDK接口发送
var result = asr.Recognize(ConvertClipToBytes(saveAudioClip),"pcm", recordFrequency,languageType);
4.将返回的数据转换为字符串,并用正则表达式提取自己想要的信息
string str = JsonConvert.SerializeObject(result, Formatting.None);
if (!string.IsNullOrEmpty(str))
{
if (Regex.IsMatch(str, @"err_msg.:.success"))
{
Match match = Regex.Match(str, "result.:..(.*?)..]");
if (match.Success)
{
str = match.Groups[1].ToString();
}
}
else
{
str = "识别结果为空";
}
tex.text = str;
}
完整代码如下:
偷懒写的代码不牢固,result在获取的时候有可能会出现NULL值,所以你们可以用Try Catch做一下容错
using System.Collections;
using System.Collections.Generic;
using System.Text.RegularExpressions;
using UnityEngine;
using UnityEngine.UI;
using UnityEngine.Networking;
using System;
using Baidu.Aip.Speech;
using Newtonsoft.Json;
public class Test : MonoBehaviour
{
public string app_id;
public string api_key;
public string secret_Key;
public Asr asr;
string accessToken = string.Empty;
int recordFrequency = 8000; //录音频率
int recordMaxTime = 20;//最大录音时长
AudioClip saveAudioClip;//存储当前录音的片段
AudioSource source;
string currentDeviceName = string.Empty;
Text tex;
Dictionary<string, object> languageType = new Dictionary<string, object>();
// Start is called before the first frame update
void Start()
{
saveAudioClip = this.transform.GetComponent<AudioClip>();
source = this.transform.GetComponent<AudioSource>();
tex = GameObject.Find("Canvas/ResultTex").GetComponent<Text>();
asr = new Asr(app_id, api_key, secret_Key);
languageType.Add("dev_pid", 1537);
}
// Update is called once per frame
void Update()
{
if (Input.GetKeyDown(KeyCode.Space))
{
StartRecord();
} else if (Input.GetKeyUp(KeyCode.Space))
{
EndRecord();
}
}
public byte[] ConvertClipToBytes(AudioClip audioClip)
{
float[] samples = new float[audioClip.samples];
audioClip.GetData(samples, 0);
short[] intData = new short[samples.Length];
byte[] bytesData = new byte[samples.Length * 2];
int rescaleFactor = 32767;
for (int i = 0; i < samples.Length; i++)
{
intData[i] = (short)(samples[i] * rescaleFactor);
byte[] byteArr = new byte[2];
byteArr = BitConverter.GetBytes(intData[i]);
byteArr.CopyTo(bytesData, i * 2);
}
return bytesData;
}
/// <summary>
/// 开始录音
/// </summary>
void StartRecord()
{
Debug.LogError("开始");
saveAudioClip = Microphone.Start(currentDeviceName, false, recordMaxTime, recordFrequency);
}
/// <summary>
/// 结束录音
/// </summary>
void EndRecord()
{
Debug.LogError("结束");
Microphone.End(currentDeviceName);
source.PlayOneShot(saveAudioClip);
var result = asr.Recognize(ConvertClipToBytes(saveAudioClip),"pcm", recordFrequency,languageType);
string str = JsonConvert.SerializeObject(result, Formatting.None);
if (!string.IsNullOrEmpty(str))
{
if (Regex.IsMatch(str, @"err_msg.:.success"))
{
Match match = Regex.Match(str, "result.:..(.*?)..]");
if (match.Success)
{
str = match.Groups[1].ToString();
}
}
else
{
str = "识别结果为空";
}
tex.text = str;
}
// StartCoroutine(RequestASR());//请求语音识别
}
}