1.查看官方文档、登录并下载我们所需的SDK。语音唤醒需要我们设置唤醒词。
在控制台下载对应SDK,由于讯飞官方只提供了C++/C语音版本,我们需要用C#调用下载SDK的dll库文件。
2.将dll库拖进Unity项目中
如果目标设备为64位,我们选择msc_x64.dll;如果是32位,我们选择msc.dll。
另外我们如果要使用语音唤醒功能,还需要wakeupresource.jet拖进Unity项目中。
3.编写C#代码调用dll库
语音合成和语音唤醒部分具体请看Unity 接入科大讯飞语音识别及语音合成_Windwos(该UP有在评论区分享demo,非常暖心!!)
【注意】在使用up主的demo时,我们需要将自己的dll库存放进去,appid换成我们自己的(APPID 于讯飞开发者控制台创建应用申请所得)
以下是我在参考VAIN_K语音唤醒和Unity 接入科大讯飞语音识别及语音合成_Windwos之后稍作调整的代码:
我们在使用语音功能时要先调用登入方法,结束时要调用退出登录的方法。
using System;
using System.Collections.Generic;
using System.IO;
using System.Runtime.InteropServices;
using System.Text;
using System.Threading;
using Unity.VisualScripting;
using UnityEngine;
/// <summary>
/// 语音工具
/// </summary>
public static class Speech
{
/* APPID 于讯飞开发者控制台创建应用申请所得 */
const string mAppID = "appid=11111111";
//wakeupresource.jet的路径
const string path = "D:\\UnityProject\\VoiceToText\\Assets\\Speech\\Plugins\\wakeupresource.jet";
const string qivw_session_begin_params = "ivw_threshold=0:1450,sst=wakeup,ivw_res_path =fo|" + path;
public static bool isAwaken = false;
/// <summary>
/// 登录接口
/// </summary>
/// <returns></returns>
public static int MSPLogin()
{
int res = MSCDLL.MSPLogin(null, null, mAppID);
if (res != 0)
{
Debug.Log($"login failed. error code: {res}");
}
else
Debug.Log("登录接口成功");
return res;
}
/// <summary>
/// 退出登录
/// </summary>
public static void MSPLogout()
{
int error = MSCDLL.MSPLogout();
if (error != 0)
Debug.Log($"logout failed. error code {error}");
else
Debug.Log("退出登录成功");
}
/// <summary>
/// 科大讯飞语音识别
/// </summary>
/// <param name="clipBuffer">音频数据</param>
/// <returns>识别后的字符串结果</returns>
public static string Asr(byte[] clipBuffer)
{
/* 首先调用登录接口
* 登录成功返回0,否则为错误代码 */
int res = 0;
/* 调用开启一次语音识别的接口
* 接收返回的句柄,后续调用写入音频、获取结果等接口需要使用
* 调用成功error code为0,否则为错误代码
* 备注:
* 第二个参数为 开始一次语音识别的参数列表 可以再进行具体的封装
* 例如 language参数 封装枚举 switch中文 为zh_cn switch英文 为en_us
* 具体参照科大讯飞官网SDK文档 */
IntPtr sessionID = MSCDLL.QISRSessionBegin(null,
"sub=iat,domain=iat,language=zh_cn,accent=mandarin,sample_rate=16000,result_type=plain,result_encoding= utf-8", ref res);
if (res != 0)
{
Debug.Log($"begin failed. error code: {res}");
OnErrorEvent();
return null;
}
/* 用于记录端点状态 */
EpStatus epStatus = EpStatus.MSP_EP_LOOKING_FOR_SPEECH;
/* 用于记录识别状态 */
RecogStatus recognizeStatus = RecogStatus.MSP_REC_STATUS_SUCCESS;
/* 调用音频写入接口 将需要识别的音频数据传入
* 写入成功返回0,否则为错误代码 */
res = MSCDLL.QISRAudioWrite(sessionID, clipBuffer, (uint)clipBuffer.Length, AudioStatus.MSP_AUDIO_SAMPLE_CONTINUE, ref epStatus, ref recognizeStatus);
if (res != 0)
{
Debug.Log($"write failed. error code: {res}");
MSCDLL.QISRSessionEnd(sessionID, "error");
OnErrorEvent();
return null;
}
res = MSCDLL.QISRAudioWrite(sessionID, null, 0, AudioStatus.MSP_AUDIO_SAMPLE_LAST, ref epStatus, ref recognizeStatus);
if (res != 0)
{
Debug.Log($"write failed. error code: {res}");
MSCDLL.QISRSessionEnd(sessionID, "error");
OnErrorEvent();
return null;
}
/* 用于存储识别结果 */
StringBuilder sb = new StringBuilder();
/* 用于累加识别结果的长度 */
int length = 0;
/* 音频写入后 反复调用获取识别结果的接口直到获取完毕 */
while (recognizeStatus != RecogStatus.MSP_REC_STATUS_COMPLETE)
{
IntPtr curtRslt = MSCDLL.QISRGetResult(sessionID, ref recognizeStatus, 0, ref res);
if (res != 0)
{
Debug.Log($"get result failed. error code: {res}");
MSCDLL.QISRSessionEnd(sessionID, "error");
OnErrorEvent();
return null;
}
/* 当前部分识别结果不为空 将其存入sb*/
if (null != curtRslt)
{
length += curtRslt.ToString().Length;
if (length > 4096)
{
Debug.Log($"size not enough: {length} > 4096");
MSCDLL.QISRSessionEnd(sessionID, "error");
OnErrorEvent();
return sb.ToString();
}
sb.Append(Marshal.PtrToStringAnsi(curtRslt));
}
Thread.Sleep(150);
}
/* 获取完全部识别结果后 结束本次语音识别 */
res = MSCDLL.QISRSessionEnd(sessionID, "ao li gei !");
if (res != 0) Debug.Log($"end failed. error code: {res}");
/* 最终退出登录 返回识别结果*/
//MSPLogout();
return sb.ToString();
}
/// <summary>
/// 科大讯飞语音识别
/// </summary>
/// <param name="path">音频文件所在路径</param>
/// <returns>识别后的字符串结果</returns>
public static string Asr(string path)
{
if (string.IsNullOrEmpty(path))
{
Debug.Log("path can not be null.");
return null;
}
byte[] clipBuffer;
try
{
clipBuffer = File.ReadAllBytes(path);
}
catch (Exception e)
{
Debug.Log($"exception: {e.Message}");
return null;
}
return Asr(clipBuffer);
}
/// <summary>
/// 科大讯飞语音识别
/// </summary>
/// <param name="clip">需要识别的AudioClip</param>
/// <returns>识别后的字符串结果</returns>
public static string Asr(AudioClip clip)
{
byte[] clipBuffer = clip.ToPCM16();
return Asr(clipBuffer);
}
/// <summary>
/// 科大讯飞语音合成
/// </summary>
/// <param name="content">需要合成音频的文本内容</param>
/// <returns>合成后的音频</returns>
public static AudioClip Tts(string content, TtsVoice voice = TtsVoice.XuJiu)
{
/* 首先调用登录接口
* 登录成功返回0,否则为错误代码 */
//int res = MSPLogin();
int res = 0;
/* 调用开启一次语音合成的接口
* 接收返回后的句柄,后续调用写入文本等接口需要使用
* 调用成功error code为0,否则为错误代码
* 备注:
* 第一个参数为 开启一次语音合成的参数列表
* 具体参照科大讯飞官网SDK文档 */
string voicer = "";
switch (voice)
{
case TtsVoice.XiaoYan:
voicer = "xiaoyan";
break;
case TtsVoice.XuJiu:
voicer = "aisjiuxu";
break;
case TtsVoice.XiaoPing:
voicer = "aisxping";
break;
case TtsVoice.XiaoJing:
voicer = "aisjinger";
break;
case TtsVoice.XuXiaoBao:
voicer = "aisbabyxu";
break;
default:
break;
}
IntPtr sessionID = MSCDLL.QTTSSessionBegin($"engine_type = cloud, voice_name = {voicer}, speed = 65, pitch = 40, text_encoding = utf8, sample_rate = 16000", ref res);
if (res != 0)
{
Debug.Log($"begin failed. error code: {res}");
OnErrorEvent();
return null;
}
/* 调用写入文本的接口 将需要合成内容传入
* 调用成功返回0,否则为错误代码 */
res = MSCDLL.QTTSTextPut(sessionID, content, (uint)Encoding.UTF8.GetByteCount(content), string.Empty);
if (res != 0)
{
Debug.Log($"put text failed. error code: {res}");
OnErrorEvent();
return null;
}
/* 用于记录长度 */
uint audioLength = 0;
/* 用于记录合成状态 */
SynthStatus synthStatus = SynthStatus.MSP_TTS_FLAG_STILL_HAVE_DATA;
List<byte[]> bytesList = new List<byte[]>();
/* 文本写入后 调用获取合成音频的接口
* 获取成功error code为0,否则为错误代码
* 需反复调用 直到合成状态为结束 或出现错误代码 */
try
{
while (true)
{
IntPtr intPtr = MSCDLL.QTTSAudioGet(sessionID, ref audioLength, ref synthStatus, ref res);
byte[] byteArray = new byte[(int)audioLength];
if (audioLength > 0) Marshal.Copy(intPtr, byteArray, 0, (int)audioLength);
bytesList.Add(byteArray);
Thread.Sleep(150);
if (synthStatus == SynthStatus.MSP_TTS_FLAG_DATA_END || res != 0)
break;
}
}
catch (Exception e)
{
OnErrorEvent();
Debug.Log($"error: {e.Message}");
return null;
}
int size = 0;
for (int i = 0; i < bytesList.Count; i++)
{
size += bytesList[i].Length;
}
var header = GetWaveHeader(size);
byte[] array = header.ToBytes();
bytesList.Insert(0, array);
size += array.Length;
byte[] bytes = new byte[size];
size = 0;
for (int i = 0; i < bytesList.Count; i++)
{
bytesList[i].CopyTo(bytes, size);
size += bytesList[i].Length;
}
AudioClip clip = bytes.ToWAV();
res = MSCDLL.QTTSSessionEnd(sessionID, "ao li gei !");
if (res != 0)
{
Debug.Log($"end failed. error code: {res}");
OnErrorEvent();
return clip;
}
//M