下面介绍如何在U3D 开发适用于 Hololens 的 UWP 平台的 语音识别功能。
首先这篇文章,得对Hololens有一定的了解。那么你如果理解的话,应该也知道Hololens本身其实也是支持语音识别的而且效率速度不错。但是,它并不能识别中文。这是笔者在墙外面查了不少资料得到结果。当然笔者能力有限,如果你发现他有方法支持中文语音识别的话,请告诉我!!!!!大恩不辞言谢。
说实话,文中讲解的不是很详细。
- 首先你先得了解一下百度的语音识别REST的相关API以及使用方式,很简单的。(由于这不是文章主体,所以不多加描述)
相关链接:百度语音识别服务 —— 语音识别 REST API 开发笔记 ,
API请求方式基本说明
如果你看会了。那么应该知道。其实我们就首先得需要token``令牌。这个通过U3D的
www`类直接拿到就OK了。也很简单。最主要的就是要去给百度云上传的音频文件。所以对音频文件的处理,也是这个项目的重要地方。
. 所以我们将创建两个类,一个用于网络处理`SpeechRecognition`.另一个用于录取以及处理音频操作`RecordingWav`。
- 能读这篇文章的大概都会面对同一个坑吧。那就是 UWP 不支持好多文件读写的类,比如
FileStream
,Directory
,File
…等等类。官方文档有所陈述,以及推荐的代替类库和这些类库的使用方法。https://docs.unity3d.com/Manual/windowsstore-missingtypes.html
.所以我们也就用这些类来进行对录音文件的读写操作。具体怎么做。等会代码中陈述。
1. 那么先展示出来录取以及处理音频操作类:RecordingWav
:
using UnityEngine;
using System;
using UnityEngine.UI;
using System.Collections;
using System.IO;
#if NETFX_CORE //UWP下编译
using Windows.Storage;
using StreamWriter = WinRTLegacy.IO.StreamWriter;
using StreamReader = WinRTLegacy.IO.StreamReader;
#else
using StreamWriter = System.IO.StreamWriter;
using StreamReader = System.IO.StreamReader;
#endif
[RequireComponent(typeof(AudioSource))]
public class RecordingWav : MonoBehaviour
{
//录音图片变红。停止变蓝
public Image imageButton;
//按钮点击的动画。
public Animator recordingButton;
string filePath = null;
int audioLength_time;
private AudioSource m_audioSource;
private AudioClip m_audioClip;
public const int SamplingRate = 8000;
private const int HEADER_SIZE = 44;
public SpeechRecognition speechRecognition;
//判断是否录音
[HideInInspector]
public bool isRecording = false;
//文件的大小
[HideInInspector]
public Byte[] speech_Byte;
//用于缓存处理后的录音文件。待用在 SpeechRecognition类中。
[HideInInspector]
public MemoryStream memoryStream;
public MicrophoneManager microphoneManager;
// Use this for initialization
void Start()
{
m_audioSource = GetComponent<AudioSource>();
filePath = Path.Combine(Application.persistentDataPath, "Microphone.wav");
}
public void StartRecording( bool isRecording)
{
if (isRecording)
{
Microphone.End(null);
m_audioClip = Microphone.Start(null, false, 60, SamplingRate);
imageButton.color = Color.red;
}
else
{
imageButton.color = Color.white;
audioLength_time = 0;
int lastPos = Microphone.GetPosition(null);
if (Microphone.IsRecording(null))
{
audioLength_time = lastPos / SamplingRate;
}
else
{
audioLength_time = 0;
Debug.Log("error : 录音时间太短");
}
Microphone.End(null);
if (audioLength_time <= 1.0f)
{
return;
}
SaveWav(filePath, m_audioClip);
PlayAudioClip();
}
}
public void PlayAudioClip()
{
if (m_audioClip.length > 5 && m_audioClip != null)
{
if (m_audioSource.isPlaying)
{
m_audioSource.Stop();
}
m_audioSource.clip = m_audioClip;
m_audioSource.Play();
}
}
bool SaveWav(string filename, AudioClip clip)
{
try
{
File.Delete(filePath);
}
catch (Exception ex)
{
Debug.Log(ex);
}
try
{
FileInfo info = new FileInfo(filePath);
if (!info.Exists)
{
info.Create();
}
ConvertAndWrite(clip);
return true;
}
catch (Exception ex)
{
Debug.Log("error : " + ex);
return false;
}
}
//从新计算录音文件的长度大小。录音长度公式为 : SamplingRate * 实际录音时间
void ConvertAndWrite(AudioClip clip)
{
int actual_Length = (audioLength_time + 1) * SamplingRate * 2;
//防止数据丢失,多加一秒的时间
float[] samples = new float[actual_Length];
clip.GetData(samples, 0);
Int16[] intData = new Int16[samples.Length];
//converting in 2 float[] steps to Int16[], //then Int16[] to Byte[]
Byte[] bytesData = new Byte[samples.Length * 2];
//bytesData array is twice the size of
//dataSource array because a float converted in Int16 is 2 bytes.
int rescaleFactor = 32767; //to convert float to Int16
for (int i = 0; i < samples.Length; i++)
{
intData[i] = (short)(samples[i] * rescaleFactor);
// bytesData = BitConverter.GetBytes(intData[i]);
Byte[] byteArr = new Byte[2];
byteArr = BitConverter.GetBytes(intData[i]);
byteArr.CopyTo(bytesData, i * 2);
}
speech_Byte = null;
//把处理后的二进制文件。通过内存流先缓存下来。
memoryStream = new MemoryStream(bytesData,false);
StartCoroutine(WriteFileStream());
}
IEnumerator WriteFileStream()
{
yield return new WaitForSeconds(1);
speechRecognition.UploadAudio();
}
public void UIHighlighted()
{
recordingButton.Play("Pressed");
}
bool isPressed = false;
public void UIPressed()
{
isPressed = !isPressed;
recordingButton.Play("Highlighted");
if(microphoneManager)
{
if(isPressed)
{
microphoneManager.StartRecording();
imageButton.color = Color.red;
}
else
{
microphoneManager.StopRecording();
imageButton.color = Color.white;
}
}
else
{
StartRecording(isPressed);
}
}
public void UINormal()
{
recordingButton.Play("Normal");
if(isPressed)
{
// StartRecording(false);
}
}
}
2. 那么先展示出来网络处理类:SpeechRecognition
:
using UnityEngine;
using System.Collections;
using System.Text;
using System;
using UnityEngine.Networking;
using UnityEngine.UI;
using System.IO;
#if NETFX_CORE //UWP下编译
using Windows.Storage;
using StreamWriter = WinRTLegacy.IO.StreamWriter;
using StreamReader = WinRTLegacy.IO.StreamReader;
#else
using StreamWriter = System.IO.StreamWriter;
using StreamReader = System.IO.StreamReader;
#endif
[Serializable]
public class UploadData
{
public string format;
public int rate;
public int channel;
public string cuid;
public string lan;
public string token;
public string speech;
public int len;
}
[Serializable]
public class BaiDuTokenData
{
public string access_token;
public string session_key;
public string scope;
public string refresh_token;
public string session_secret;
public int expires_in;
}
[Serializable]
public class AcceptanceIdentification
{
public string err_no;
public string err_msg;
public string sn;
public string[] result;
public string corpus_no;
}
public class SpeechRecognition : MonoBehaviour {
// 用于识别语音中的对应文字
public GameObject text_Group_layout;
private Text[] textgroup;
//展示识别结果
public Text show_text;
//录音以及处理音频文件的类
private RecordingWav recordingWav;
string filePath = null;
string format = "wav";
int rate = 8000;
int channel = 1;
string cuid = "xxxxx"; //此处写入你的相关账号
string lan = "zh";
string token;
string speech;
int len;
// Use this for initialization
void Start () {
filePath = Path.Combine(Application.persistentDataPath, "Microphone.wav");
recordingWav = GetComponent<RecordingWav>();
if (text_Group_layout == null)
return;
textgroup = text_Group_layout.gameObject.GetComponentsInChildren<Text>();
}
public void UploadAudio()
{
StartCoroutine(GetIdentifyWords());
}
//用来进行语音识别的方法。如果识别正确则对应的文字变色
void MatchTheWords(string needTest)
{
if (text_Group_layout == null)
return;
Debug.Log(needTest);
for (int i = 0;i < textgroup.Length;i++)
{
if(needTest.Contains(textgroup[i].text))
{
textgroup[i].color = Color.green;
}
else
{
textgroup[i].color = Color.red;
}
}
}
//写入你的客户端ID
string client_id = ".........";
string client_secret = "..............";
string url_token = "https://openapi.baidu.com/oauth/2.0/token";
string url_api = "http://vop.baidu.com/server_api";
string post_string;
IEnumerator GetIdentifyWords()
{
// using (StreamReader audio = new StreamReader(filePath, Encoding.UTF8))
using (StreamReader audio = new StreamReader(recordingWav.memoryStream))
{
// StreamReader audio = new StreamReader(recordingWav.memoryStream);
BinaryReader br = new BinaryReader(audio.BaseStream);
len = (int)audio.BaseStream.Length;
byte[] buffer = new byte[len];
br.Read(buffer, 0, buffer.Length);
speech = Convert.ToBase64String(buffer);
audio.Dispose();
recordingWav.memoryStream.Dispose();
}
#region GetToken
WWWForm form1 = new WWWForm();
form1.AddField("grant_type", "client_credentials");
form1.AddField("client_id", client_id);
form1.AddField("client_secret", client_secret);
WWW w1 = new WWW(url_token, form1);
yield return w1;
BaiDuTokenData getToken = new BaiDuTokenData();
JsonUtility.FromJsonOverwrite(w1.text, getToken);
token = getToken.access_token;
#endregion
if (token == null || speech == null)
show_text.text = "参数信息不够";
yield return 0;
var request = new UnityWebRequest(url_api, "POST");
UploadData uploadData = new UploadData();
uploadData.format = format;
uploadData.rate = rate;
uploadData.channel = channel;
uploadData.cuid = cuid;
uploadData.token = token;
uploadData.speech = speech;
uploadData.len = len;
uploadData.lan = lan;
string data = JsonUtility.ToJson(uploadData);
//JsonData data = new JsonData();
Byte[] post_byte = Encoding.UTF8.GetBytes(data);
request.uploadHandler = (UploadHandler)new UploadHandlerRaw(post_byte);
request.downloadHandler = (DownloadHandler)new DownloadHandlerBuffer();
request.SetRequestHeader("Content-Type", "application/json");
yield return request.Send();
try
{
AcceptanceIdentification acceptData = new AcceptanceIdentification();
JsonUtility.FromJsonOverwrite(request.downloadHandler.text, acceptData);
show_text.text = request.downloadHandler.text;
show_text.text = acceptData.result[0].ToString();
MatchTheWords(acceptData.result[0]);
}
catch(Exception ex)
{
show_text.text = "error :" + ex;
}
}
}
我是李本心明
首先谢谢大家的支持,其次如果你碰到什么其他问题的话,欢迎来 我自己的一个 讨论群559666429
来(扫扫下面二维码或者点击群链接 Unity3D[ 交流] ),大家一起找答案,共同进步。
由于工作生活太忙了,对于大家的帮助时间已经没有之前那么充裕了。如果有志同道合的朋友,可以接受无偿的帮助别人,可以加我QQ单独联系我,一块经营一下。