Unity使用讯飞语音模型(语音合成+语音识别+语音唤醒)Window端SDK

苏轼轼

于 2024-04-07 16:35:53 发布

阅读量758

点赞数 9

文章标签：语音识别人工智能

本文链接：https://blog.csdn.net/m0_53853844/article/details/137178450

版权

1.查看官方文档、登录并下载我们所需的SDK。语音唤醒需要我们设置唤醒词。

讯飞智能语音SDK文档官网

讯飞智能语音产品介绍官网

在控制台下载对应SDK，由于讯飞官方只提供了C++/C语音版本，我们需要用C#调用下载SDK的dll库文件。

2.将dll库拖进Unity项目中

如果目标设备为64位，我们选择msc_x64.dll；如果是32位，我们选择msc.dll。

另外我们如果要使用语音唤醒功能，还需要wakeupresource.jet拖进Unity项目中。

3.编写C#代码调用dll库

语音合成和语音唤醒部分具体请看Unity 接入科大讯飞语音识别及语音合成_Windwos（该UP有在评论区分享demo，非常暖心！！）

【注意】在使用up主的demo时，我们需要将自己的dll库存放进去，appid换成我们自己的（APPID 于讯飞开发者控制台创建应用申请所得）

以下是我在参考VAIN_K语音唤醒和Unity 接入科大讯飞语音识别及语音合成_Windwos之后稍作调整的代码：

我们在使用语音功能时要先调用登入方法，结束时要调用退出登录的方法。

using System;
using System.Collections.Generic;
using System.IO;
using System.Runtime.InteropServices;
using System.Text;
using System.Threading;
using Unity.VisualScripting;
using UnityEngine;


/// <summary>
/// 语音工具
/// </summary>
public static class Speech
{
    /* APPID 于讯飞开发者控制台创建应用申请所得 */
    const string mAppID = "appid=11111111";
    //wakeupresource.jet的路径
    const string path = "D:\\UnityProject\\VoiceToText\\Assets\\Speech\\Plugins\\wakeupresource.jet";
    const string qivw_session_begin_params = "ivw_threshold=0:1450,sst=wakeup,ivw_res_path =fo|" + path;
    public static bool isAwaken = false;

    /// <summary>
    /// 登录接口
    /// </summary>
    /// <returns></returns>
    public static int MSPLogin()
    {
        int res = MSCDLL.MSPLogin(null, null, mAppID);
        if (res != 0)
        {
            Debug.Log($"login failed. error code: {res}");
        }
        else
            Debug.Log("登录接口成功");
        return res;
    }
    /// <summary>
    /// 退出登录
    /// </summary>
    public static void MSPLogout()
    {
        int error = MSCDLL.MSPLogout();
        if (error != 0)
            Debug.Log($"logout failed. error code {error}");
        else
            Debug.Log("退出登录成功");
    }


    /// <summary>
    /// 科大讯飞语音识别
    /// </summary>
    /// <param name="clipBuffer">音频数据</param>
    /// <returns>识别后的字符串结果</returns>
    public static string Asr(byte[] clipBuffer)
        {
        /* 首先调用登录接口
         * 登录成功返回0,否则为错误代码 */
        int res = 0;
        /* 调用开启一次语音识别的接口
         * 接收返回的句柄,后续调用写入音频、获取结果等接口需要使用
         * 调用成功error code为0,否则为错误代码 
         * 备注:
         *  第二个参数为 开始一次语音识别的参数列表 可以再进行具体的封装
         *  例如 language参数 封装枚举 switch中文 为zh_cn    switch英文 为en_us
         *  具体参照科大讯飞官网SDK文档 */
        IntPtr sessionID = MSCDLL.QISRSessionBegin(null,
                "sub=iat,domain=iat,language=zh_cn,accent=mandarin,sample_rate=16000,result_type=plain,result_encoding= utf-8", ref res);
            if (res != 0)
            {
                Debug.Log($"begin failed. error code: {res}");
                OnErrorEvent();
                return null;
            }

            /* 用于记录端点状态 */
            EpStatus epStatus = EpStatus.MSP_EP_LOOKING_FOR_SPEECH;
            /* 用于记录识别状态 */
            RecogStatus recognizeStatus = RecogStatus.MSP_REC_STATUS_SUCCESS;

            /* 调用音频写入接口 将需要识别的音频数据传入
             * 写入成功返回0,否则为错误代码 */
            res = MSCDLL.QISRAudioWrite(sessionID, clipBuffer, (uint)clipBuffer.Length, AudioStatus.MSP_AUDIO_SAMPLE_CONTINUE, ref epStatus, ref recognizeStatus);
            if (res != 0)
            {
                Debug.Log($"write failed. error code: {res}");
                MSCDLL.QISRSessionEnd(sessionID, "error");
                OnErrorEvent();
                return null;
            }
            res = MSCDLL.QISRAudioWrite(sessionID, null, 0, AudioStatus.MSP_AUDIO_SAMPLE_LAST, ref epStatus, ref recognizeStatus);
            if (res != 0)
            {
                Debug.Log($"write failed. error code: {res}");
                MSCDLL.QISRSessionEnd(sessionID, "error");
                OnErrorEvent();
                return null;
            }

            /* 用于存储识别结果 */
            StringBuilder sb = new StringBuilder();
            /* 用于累加识别结果的长度 */
            int length = 0;

            /* 音频写入后 反复调用获取识别结果的接口直到获取完毕 */
            while (recognizeStatus != RecogStatus.MSP_REC_STATUS_COMPLETE)
            {
                IntPtr curtRslt = MSCDLL.QISRGetResult(sessionID, ref recognizeStatus, 0, ref res);
                if (res != 0)
                {
                    Debug.Log($"get result failed. error code: {res}");
                    MSCDLL.QISRSessionEnd(sessionID, "error");
                    OnErrorEvent();
                    return null;
                }
                /* 当前部分识别结果不为空 将其存入sb*/
                if (null != curtRslt)
                {
                    length += curtRslt.ToString().Length;
                    if (length > 4096)
                    {
                        Debug.Log($"size not enough: {length} > 4096");
                        MSCDLL.QISRSessionEnd(sessionID, "error");
                        OnErrorEvent();
                        return sb.ToString();
                    }
                    sb.Append(Marshal.PtrToStringAnsi(curtRslt));
                }
                Thread.Sleep(150);
            }

            /* 获取完全部识别结果后 结束本次语音识别 */
            res = MSCDLL.QISRSessionEnd(sessionID, "ao li gei !");
            if (res != 0) Debug.Log($"end failed. error code: {res}");

        /* 最终退出登录 返回识别结果*/
        //MSPLogout();
            return sb.ToString();
        }

    /// <summary>
    /// 科大讯飞语音识别
    /// </summary>
    /// <param name="path">音频文件所在路径</param>
    /// <returns>识别后的字符串结果</returns>
    public static string Asr(string path)
        {
            if (string.IsNullOrEmpty(path))
            {
                Debug.Log("path can not be null.");
                return null;
            }
            byte[] clipBuffer;
            try
            {
                clipBuffer = File.ReadAllBytes(path);
            }
            catch (Exception e)
            {
                Debug.Log($"exception: {e.Message}");
                return null;
            }
            return Asr(clipBuffer);
        }
    /// <summary>
    /// 科大讯飞语音识别
    /// </summary>
    /// <param name="clip">需要识别的AudioClip</param>
    /// <returns>识别后的字符串结果</returns>
    public static string Asr(AudioClip clip)
        {
            byte[] clipBuffer = clip.ToPCM16();
            return Asr(clipBuffer);
        }

    /// <summary>
    /// 科大讯飞语音合成
    /// </summary>
    /// <param name="content">需要合成音频的文本内容</param>
    /// <returns>合成后的音频</returns>
    public static AudioClip Tts(string content, TtsVoice voice = TtsVoice.XuJiu)
        {
        /* 首先调用登录接口
         * 登录成功返回0,否则为错误代码 */
        //int res = MSPLogin();
        int res = 0;

            /* 调用开启一次语音合成的接口
             * 接收返回后的句柄,后续调用写入文本等接口需要使用
             * 调用成功error code为0,否则为错误代码
             * 备注:
             *  第一个参数为 开启一次语音合成的参数列表
             *  具体参照科大讯飞官网SDK文档 */
            string voicer = "";
            switch (voice)
            {
                case TtsVoice.XiaoYan:
                    voicer = "xiaoyan";
                    break;
                case TtsVoice.XuJiu:
                    voicer = "aisjiuxu";
                    break;
                case TtsVoice.XiaoPing:
                    voicer = "aisxping";
                    break;
                case TtsVoice.XiaoJing:
                    voicer = "aisjinger";
                    break;
                case TtsVoice.XuXiaoBao:
                    voicer = "aisbabyxu";
                    break;
                default:
                    break;
            }
            IntPtr sessionID = MSCDLL.QTTSSessionBegin($"engine_type = cloud, voice_name = {voicer}, speed = 65, pitch = 40, text_encoding = utf8, sample_rate = 16000", ref res);
            if (res != 0)
            {
                Debug.Log($"begin failed. error code: {res}");
                OnErrorEvent();
                return null;
            }

            /* 调用写入文本的接口 将需要合成内容传入
             * 调用成功返回0,否则为错误代码 */
            res = MSCDLL.QTTSTextPut(sessionID, content, (uint)Encoding.UTF8.GetByteCount(content), string.Empty);
            if (res != 0)
            {
                Debug.Log($"put text failed. error code: {res}");
                OnErrorEvent();
                return null;
            }

            /* 用于记录长度 */
            uint audioLength = 0;
            /* 用于记录合成状态 */
            SynthStatus synthStatus = SynthStatus.MSP_TTS_FLAG_STILL_HAVE_DATA;

            List<byte[]> bytesList = new List<byte[]>();

            /* 文本写入后 调用获取合成音频的接口
             * 获取成功error code为0,否则为错误代码
             * 需反复调用 直到合成状态为结束 或出现错误代码 */
            try
            {
                while (true)
                {
                    IntPtr intPtr = MSCDLL.QTTSAudioGet(sessionID, ref audioLength, ref synthStatus, ref res);
                    byte[] byteArray = new byte[(int)audioLength];
                    if (audioLength > 0) Marshal.Copy(intPtr, byteArray, 0, (int)audioLength);

                    bytesList.Add(byteArray);

                    Thread.Sleep(150);
                    if (synthStatus == SynthStatus.MSP_TTS_FLAG_DATA_END || res != 0)
                        break;
                }
            }
            catch (Exception e)
            {
                OnErrorEvent();
                Debug.Log($"error: {e.Message}");
                return null;
            }

            int size = 0;
            for (int i = 0; i < bytesList.Count; i++)
            {
                size += bytesList[i].Length;
            }

            var header = GetWaveHeader(size);
            byte[] array = header.ToBytes();
            bytesList.Insert(0, array);
            size += array.Length;

            byte[] bytes = new byte[size];

            size = 0;
            for (int i = 0; i < bytesList.Count; i++)
            {
                bytesList[i].CopyTo(bytes, size);
                size += bytesList[i].Length;
            }
            AudioClip clip = bytes.ToWAV();


            res = MSCDLL.QTTSSessionEnd(sessionID, "ao li gei !");
            if (res != 0)
            {
                Debug.Log($"end failed. error code: {res}");
                OnErrorEvent();
                return clip;
            }

        //MSPLogout();
            return clip;
    }
    /// <summary>
    /// 科大讯飞语音合成
    /// </summary>
    /// <param name="content">需要合成的内容</param>
    /// <param name="path">将合成后的音频写入指定的路径</param>
    /// <returns>调用成功返回true 发生异常返回false</returns>
    public static bool Tts(string content, string path)
        {
            /* 首先调用登录接口
             * 登录成功返回0,否则为错误代码 */
            int res = MSCDLL.MSPLogin(null, null, mAppID);
            if (res != 0)
            {
                Debug.Log($"login failed. error code: {res}");
                return false;
            }

            /* 调用开启一次语音合成的接口
             * 接收返回后的句柄,后续调用写入文本等接口需要使用
             * 调用成功error code为0,否则为错误代码
             * 备注:
             *  第一个参数为 开启一次语音合成的参数列表
             *  具体参照科大讯飞官网SDK文档 */
            IntPtr sessionID = MSCDLL.QTTSSessionBegin("engine_type = cloud, voice = xiaoyan, text_encoding = utf8, sample_rate = 16000", ref res);
            if (res != 0)
            {
                Debug.Log($"begin failed. error code: {res}");
                OnErrorEvent();
                return false;
            }

            /* 调用写入文本的接口 将需要合成内容传入
             * 调用成功返回0,否则为错误代码 */
            res = MSCDLL.QTTSTextPut(sessionID, content, (uint)Encoding.UTF8.GetByteCount(content), string.Empty);
            if (res != 0)
            {
                Debug.Log($"put text failed. error code: {res}");
                OnErrorEvent();
                return false;
            }

            /* 用于记录长度 */
            uint audioLength = 0;
            /* 用于记录合成状态 */
            SynthStatus synthStatus = SynthStatus.MSP_TTS_FLAG_STILL_HAVE_DATA;
            /* 开启一个流 */
            MemoryStream ms = new MemoryStream();
            ms.Write(new byte[44], 0, 44);


            /* 文本写入后 调用获取合成音频的接口
             * 获取成功error code为0,否则为错误代码
             * 需反复调用 直到合成状态为结束 或出现错误代码 */
            try
            {
                while (true)
                {
                    IntPtr intPtr = MSCDLL.QTTSAudioGet(sessionID, ref audioLength, ref synthStatus, ref res);
                    byte[] byteArray = new byte[(int)audioLength];
                    if (audioLength > 0) Marshal.Copy(intPtr, byteArray, 0, (int)audioLength);
                    ms.Write(byteArray, 0, (int)audioLength);
                    Thread.Sleep(150);
                    if (synthStatus == SynthStatus.MSP_TTS_FLAG_DATA_END || res != 0)
                        break;
                }
            }
            catch (Exception e)
            {
                OnErrorEvent();
                Debug.Log($"error: {e.Message}");
                return false;
            }


            var header = GetWaveHeader((int)ms.Length);
            byte[] array = header.ToBytes();
            ms.Position = 0L;
            ms.Write(array, 0, array.Length);
            ms.Position = 0L;

            FileStream fs = new FileStream(path, System.IO.FileMode.Create, FileAccess.Write);
            ms.WriteTo(fs);
            ms.Close();
            fs.Close();

            res = MSCDLL.QTTSSessionEnd(sessionID, "ao li gei !");
            if (res != 0)
            {
                Debug.Log($"end failed. error code: {res}");
                OnErrorEvent();
                return false;
            }

            res = MSCDLL.MSPLogout();
            if (res != 0)
            {
                Debug.Log($"logout failed. error code: {res}");
                return false;
            }
            return true;
        }

    /// <summary>
    /// 科大讯飞语音唤醒
    /// </summary>
    /// <param name="clipBuffer">音频数据</param>
    /// <returns>识别后的字符串结果</returns>
    public static void MSCAwaken(byte[] clipBuffer)
    {
        /* 首先调用登录接口 在外部调用*/
        int errorCode = 0;
        IntPtr sessionID = MSCDLL.QIVWSessionBegin(null, qivw_session_begin_params, ref errorCode);
        if (errorCode != 0)
        {
            Debug.LogError("初始化语音唤醒失败！错误信息：" + errorCode);
            OnErrorEvent();
            return;
        }
        这个是Unity 自动录音并截取音频保存 章节的方法
        //byte[] playerClipByte = AudioClipToByte(Player.clip, start, end);//语音文件byte[]
        int message = MSCDLL.QIVWRegisterNotify(sessionID, cb_ivw_msg_proc, IntPtr.Zero);
        if (message == 0)
        {
            message = MSCDLL.QIVWAudioWrite(sessionID, clipBuffer, (uint)clipBuffer.Length, AudioStatus.MSP_AUDIO_SAMPLE_LAST);
            if (message != 0)
                Debug.LogError("写入唤醒音频失败！错误信息：" + errorCode);
        }
        else
        {
            Debug.LogError("语音唤醒注册回调失败！错误信息：" + errorCode);
        }
        //if (message != 0)
        //{
        //    Debug.LogError("写入语音唤醒失败！错误信息：" + message);
        //    OnErrorEvent();
        //    return;
        //}
        //Debug.Log("QIVWAudioWrite");
        //message = MSCDLL.QIVWAudioWrite(sessionID, null, 0, AudioStatus.MSP_AUDIO_SAMPLE_LAST);
        //if (message != 0)
        //{
        //    Debug.LogError("写入语音唤醒失败！错误信息：" + message);
        //    OnErrorEvent();
        //    return;
        //}
        message = MSCDLL.QIVWSessionEnd(sessionID, "语音唤醒结束");
        if (message != 0)
        {
            Debug.LogError("语音唤醒结束失败！错误信息：" + message);
            OnErrorEvent();
            return;
        }
        /* 最终退出登录*/
    }

    /// <summary>
    /// 语音唤醒的回调函数
    /// </summary>
    /// <param name="sessionID"></param>
    /// <param name="msg"></param>
    /// <param name="param1"></param>
    /// <param name="param2"></param>
    /// <param name="info"></param>
    /// <param name="userData"></param>
    /// <returns></returns>
    private static int cb_ivw_msg_proc(IntPtr sessionID, int msg, int param1, int param2, IntPtr info, IntPtr userData)
    {
        if (msg == (int)IvwStatus.MSP_IVW_MSG_ERROR) //唤醒出错消息
        {
            Debug.LogError("MSP_IVW_MSG_ERROR error code:" + param1);
        }
        else if (msg == (int)IvwStatus.MSP_IVW_MSG_WAKEUP)//唤醒成功消息
        {
            isAwaken = true;
            Debug.Log("唤醒成功,执行回调函数");
        }
        return 0;
    }

    /* 发生异常后调用退出登录接口 */
    static void OnErrorEvent()
        {
            int res = MSCDLL.MSPLogout();
            if (res != 0)
            {
                Debug.Log($"logout failed. error code: {res}");
            }
        }
    /* 语音音频头 初始化赋值 */
    static WaveHeader GetWaveHeader(int dataLen)
        {
            return new WaveHeader
            {
                RIFFID = 1179011410,
                FileSize = dataLen - 8,
                RIFFType = 1163280727,
                FMTID = 544501094,
                FMTSize = 16,
                FMTTag = 1,
                FMTChannel = 1,
                FMTSamplesPerSec = 16000,
                AvgBytesPerSec = 32000,
                BlockAlign = 2,
                BitsPerSample = 16,
                DataID = 1635017060,
                DataSize = dataLen - 44
            };
        }
}
public class MSCDLL
{
    #region msp_cmn.h 通用接口
    /// <summary>
        /// 初始化msc 用户登录  user login. 
        /// 使用其他接口前必须先调用MSPLogin,可以在应用程序启动时调用
        /// </summary>
        /// <param name="usr">user name. 此参数保留 传入NULL即可</param>
        /// <param name="pwd">password. 此参数保留 传入NULL即可</param>
        /// <param name="parameters">parameters when user login. 每个参数和参数值通过key=value的形式组成参数对,如果有多个参数对,再用逗号进行拼接</param>
        ///     通用 appid 应用ID: 于讯飞开放平台申请SDK成功后获取到的appid
        ///     离线 engine_start 离线引擎启动: 启用离线引擎 支持参数: ivw:唤醒 asr:识别
        ///     离线 [xxx]_res_path 离线引擎资源路径: 设置ivw asr引擎离线资源路径 
        ///             详细格式: fo|[path]|[offset]|[length]|xx|xx 
        ///             单个资源路径示例: ivw_res_path=fo|res/ivw/wakeupresource.jet
        ///             多个资源路径示例: asr_res_path=fo|res/asr/common.jet;fo|res/asr/sms.jet
        /// <returns>return 0 if sucess, otherwise return error code. 成功返回MSP_SUCCESS,否则返回错误代码</returns>
        [DllImport("msc_x64", CallingConvention = CallingConvention.StdCall)]
        public static extern int MSPLogin(string usr, string pwd, string parameters);
        /// <summary>
        /// 退出登录  user logout.
        /// 本接口和MSPLogin配合使用 确保其他接口调用结束之后调用MSPLogout,否则结果不可预期
        /// </summary>
        /// <returns>如果函数调用成功返回MSP_SUCCESS,否则返回错误代码 return 0 if sucess, otherwise return error code.</returns>
        [DllImport("msc_x64", CallingConvention = CallingConvention.StdCall)]
        public static extern int MSPLogout();
        /// <summary>
        /// 用户数据上传  upload data such as user config, custom grammar, etc. 
        /// </summary>
        /// <param name="dataName">数据名称字符串  should be unique to diff other data.</param>
        /// <param name="data">待上传数据缓冲区的起始地址  the data buffer pointer, data could be binary.</param>
        /// <param name="dataLen">数据长度(如果是字符串,则不包含'\0')  length of data.</param>
        /// <param name="_params">parameters about uploading data.</param>
        ///     在线 sub = uup,dtt = userword 上传用户词表
        ///     在线 sub = uup,dtt = contact 上传联系人
        /// <param name="errorCode">return 0 if success, otherwise return error code.</param>
        /// <returns>data id returned by server, used for special command.</returns>
        [DllImport("msc_x64", CallingConvention = CallingConvention.StdCall)]
        public static extern IntPtr MSPUploadData(string dataName, IntPtr data, uint dataLen, string _params, ref int errorCode);
        /// <summary>
        /// write data to msc, such as data to be uploaded, searching text, etc.
        /// </summary>
        /// <param name="data">the data buffer pointer, data could be binary.</param>
        /// <param name="dataLen">length of data.</param>
        /// <param name="dataStatus">data status, 2: first or continuous, 4: last.</param>
        /// <returns>return 0 if success, otherwise return error code.</returns>
        [DllImport("msc_x64", CallingConvention = CallingConvention.StdCall)]
        public static extern int MSPAppendData(IntPtr data, uint dataLen, uint dataStatus);
        /// <summary>
        /// download data such as user config, etc.
        /// </summary>
        /// <param name="_params">parameters about data to be downloaded.</param>
        /// <param name="dataLen">length of received data.</param>
        /// <param name="errorCode">return 0 if success, otherwise return error code.</param>
        /// <returns>received data buffer pointer, data could be binary, null if failed or data does not exsit.</returns>
        [DllImport("msc_x64", CallingConvention = CallingConvention.StdCall)]
        public static extern IntPtr MSPDownloadData(string _params, ref uint dataLen, ref int errorCode);
        /// <summary>
        /// set param of msc.  参数设置接口、离线引擎初始化接口 
        /// </summary>
        /// <param name="paramName">param name.</param>
        ///     离线 engine_start   启动离线引擎
        ///     离线 engine_destroy 销毁离线引擎
        /// <param name="paramValue">param value. 参数值</param>
        /// <returns>return 0 if success, otherwise return errcode. 函数调用成功则其值为MSP_SUCCESS,否则返回错误代码</returns>
        [DllImport("msc_x64", CallingConvention = CallingConvention.StdCall)]
        public static extern int MSPSetParam(string paramName, string paramValue);
        /// <summary>
        /// get param of msc.  获取msc的设置信息
        /// </summary>
        /// <param name="paramName">param name. 参数名,一次调用只支持查询一个参数</param>
        ///     在线 upflow   上行数据量
        ///     在线 downflow 下行数据量
        /// <param name="paramValue">param value.</param>
        ///     输入: buffer首地址
        ///     输出: 向该buffer写入获取到的信息
        /// <param name="valueLen">param value (buffer) length.</param>
        ///     输入: buffer的大小
        ///     输出: 信息实际长度(不含'\0')
        /// <returns>return 0 if success, otherwise return errcode. 函数调用成功返回MSP_SUCCESS,否则返回错误代码</returns>
        [DllImport("msc_x64", CallingConvention = CallingConvention.StdCall)]
        public static extern int MSPGetParam(string paramName, ref byte[] paramValue, ref uint valueLen);
        /// <summary>
        /// get version of msc or local engine. 获取msc或本地引擎版本信息
        /// </summary>
        /// <param name="verName">version name, could be "msc", "aitalk", "aisound", "ivw". 参数名,一次调用只支持查询一个参数</param>
        ///     离线 ver_msc msc版本号
        ///     离线 ver_asr 离线识别版本号,目前不支持
        ///     离线 ver_tts 离线合成版本号
        ///     离线 ver_ivw 离线唤醒版本号
        /// <param name="errorCode">return 0 if success, otherwise return error code. 如果函数调用成功返回MSP_SUCCESS,否则返回错误代码</param>
        /// <returns>return version value if success, null if fail.  成功返回缓冲区指针,失败或数据不存在返回NULL</returns>
        [DllImport("msc_x64", CallingConvention = CallingConvention.StdCall)]
        public static extern IntPtr MSPGetVersion(string verName, ref int errorCode);
        #endregion

    #region qisr.h 语音识别
    /// <summary>
        /// create a recognizer session to recognize audio data. 开始一次语音识别
        /// </summary>
        /// <param name="grammarList">garmmars list, inline grammar support only one. 此参数保留,传入NULL即可</param>
        /// <param name="_params">parameters when the session created.</param>
        ///     通用  engine_type     引擎类型                      cloud在线引擎 local离线引擎
        ///     在线  sub             本次识别请求的类型            iat语音听写 asr命令词识别
        ///     在线  language        语言                          zh_cn简体中文 en_us英文
        ///     在线  domain          领域                          iat语音听写
        ///     在线  accent          语言区域                      mandarin普通话
        ///     通用  sample_rate     音频采样率                    16000 8000
        ///     通用  asr_threshold   识别门限                      离线语法识别结果门限值,设置只返回置信度得分大于此门限值的结果 0-100
        ///     离线  asr_denoise     是否开启降噪功能              0不开启 1开启
        ///     离线  asr_res_path    离线识别资源路径              离线识别资源所在路径
        ///     离线  grm_build_path  离线语法生成路径              构建离线语法所生成数据的保存路径(文件夹)
        ///     通用  result_type     结果格式                      plain json
        ///     通用  text_encoding   文本编码格式                  表示参数中携带的文本编码格式
        ///     离线  local_grammar   离线语法id                    构建离线语法后获得的语法ID
        ///     通用  ptt             添加标点符号(sub=iat时有效)   0:无标点符号;1:有标点符号
        ///     在线  aue             音频编码格式和压缩等级        编码算法:raw;speex;speex-wb;ico 编码等级: raw不进行压缩 speex系列0-10
        ///     通用  result_encoding 识别结果字符串所用编码格式    plain:UTF-8,GB2312 json:UTF-8
        ///     通用  vad_enable      VAD功能开关                   是否启用VAD 默认为开启VAD 0(或false)为关闭
        ///     通用  vad_bos         允许头部静音的最长时间        目前未开启该功能
        ///     通用  vad_eos         允许尾部静音的最长时间        0-10000毫秒 默认为2000
        /// <param name="errorCode">return 0 if success, otherwise return error code. 函数调用成功则其值为MSP_SUCCESS,否则返回错误代码</param>
        /// <returns>return session id of current session, null is failed. 函数调用成功返回字符串格式的sessionID,失败返回NULL sessionID是本次识别的句柄</returns>
    [DllImport("msc_x64", CallingConvention = CallingConvention.StdCall)]
    public static extern IntPtr QISRSessionBegin(string grammarList, string _params, ref int errorCode);
    /// <summary>
        /// writing binary audio data to recognizer. 写入本次识别的音频
        /// </summary>
        /// <param name="sessionID">the session id returned by recog_begin. 由QISRSessionBegin返回的句柄</param>
        /// <param name="waveData">binary data of waveform. 音频数据缓冲区起始地址</param>
        /// <param name="waveLen">waveform data size in bytes. 音频数据长度,单位字节</param>
        /// <param name="audioStatus">audio status. 用来告知msc音频发送是否完成</param>
        /// <param name="epStatus">ep status. 端点检测(End-point detected)器所处的状态</param>
        /// <param name="recogStatus">recognition status. 识别器返回的状态,提醒用户及时开始\停止获取识别结果</param>
        ///     本接口需不断调用,直到音频全部写入为止 上传音频时,需更新audioStatus的值 具体来说:
        ///         当写入首块音频时,将audioStatus置为MSP_AUDIO_SAMPLE_FIRST
        ///         当写入最后一块音频时,将audioStatus置为MSP_AUDIO_SAMPLE_LAST
        ///         其余情况下,将audioStatus置为MSP_AUDIO_SAMPLE_CONTINUE
        ///     同时,需定时检查两个变量: epStatus和recogStatus 具体来说:
        ///         当epStatus显示已检测到后端点时,MSC已不再接收音频,应及时停止音频写入
        ///         当rsltStatus显示有识别结果返回时,即可从MSC缓存中获取结果
        /// <returns>return 0 if success, otherwise return error code. 函数调用成功则其值为MSP_SUCCESS,否则返回错误代码</returns>
    [DllImport("msc_x64", CallingConvention = CallingConvention.StdCall)]
    public static extern int QISRAudioWrite(IntPtr sessionID, byte[] waveData, uint waveLen, AudioStatus audioStatus, ref EpStatus epStatus, ref RecogStatus recogStatus);
    /// <summary>
        /// get recognize result in specified format. 获取识别结果
        /// </summary>
        /// <param name="sessionID">session id returned by session begin. 由QISRSessionBegin返回的句柄</param>
        /// <param name="rsltStatus">status of recognition result, 识别结果的状态,其取值范围和含义请参考QISRAudioWrite 的参数recogStatus</param>
        /// <param name="waitTime">此参数做保留用</param>
        /// <param name="errorCode">return 0 if success, otherwise return error code. 函数调用成功则其值为MSP_SUCCESS,否则返回错误代码</param>
        /// 当写入音频过程中已经有部分识别结果返回时,可以获取结果
        /// 在音频写入完毕后,用户需反复调用此接口,直到识别结果获取完毕(rlstStatus值为5)或返回错误码 
        /// 注意:如果某次成功调用后暂未获得识别结果,请将当前线程sleep一段时间,以防频繁调用浪费CPU资源
        /// <returns>return 0 if success, otherwise return error code. 函数执行成功且有识别结果时,返回结果字符串指针 其他情况(失败或无结果)返回NULL</returns>
    [DllImport("msc_x64", CallingConvention = CallingConvention.StdCall)]
    public static extern IntPtr QISRGetResult(IntPtr sessionID, ref RecogStatus rsltStatus, int waitTime, ref int errorCode);
    /// <summary>
        /// end the recognizer session, release all resource. 结束本次语音识别
        /// 本接口和QISRSessionBegin对应,调用此接口后,该句柄对应的相关资源(参数、语法、音频、实例等)都会被释放,用户不应再使用该句柄
        /// </summary>
        /// <param name="sessionID">session id string to end. 由QISRSessionBegin返回的句柄</param>
        /// <param name="hints">user hints to end session, hints will be logged to CallLog. 结束本次语音识别的原因描述,为用户自定义内容</param>
        /// <returns>return 0 if sucess, otherwise return error code. 函数调用成功则其值为MSP_SUCCESS,否则返回错误代码</returns>
    [DllImport("msc_x64", CallingConvention = CallingConvention.StdCall)]
    public static extern int QISRSessionEnd(IntPtr sessionID, string hints);
    /// <summary>
        /// get params related with msc. 获取当次语音识别信息,如上行流量、下行流量等
        /// </summary>
        /// <param name="sessionID">session id of related param, set null to got global param. 由QISRSessionbegin返回的句柄,如果为NULL,获取msc的设置信息</param>
        /// <param name="paramName">param name,could pass more than one param split by ','';'or'\n'. 参数名,一次调用只支持查询一个参数</param>
        ///     在线  sid         服务端会话ID 长度为32字节
        ///     在线  upflow      上行数据量
        ///     在线  downflow    下行数据量
        ///     通用  volume      最后一次写入的音频的音量
        /// <param name="paramValue">param value buffer, malloced by user.</param>
        ///     输入: buffer首地址
        ///     输出: 向该buffer写入获取到的信息
        /// <param name="valueLen">pass in length of value buffer, and return length of value string.</param>
        ///     输入: buffer的大小
        ///     输出: 信息实际长度(不含’\0’)
        /// <returns>return 0 if success, otherwise return errcode. 函数调用成功返回MSP_SUCCESS,否则返回错误代码</returns>
    [DllImport("msc_x64", CallingConvention = CallingConvention.StdCall)]
    public static extern int QISRGetParam(string sessionID, string paramName, ref byte[] paramValue, ref uint valueLen);
    #endregion

    #region qtts.h 语音合成
        /// <summary>
        /// create a tts session to synthesize data. 开始一次语音合成,分配语音合成资源
        /// </summary>
        /// <param name="_params">parameters when the session created. 传入的参数列表</param>
        ///     通用  engine_type     引擎类型                      cloud在线引擎 local离线引擎
        ///     通用  voice_name      发音人                        不同的发音人代表了不同的音色 如男声、女声、童声等
        ///     通用  speed           语速                          0-100 default50
        ///     通用  volume          音量                          0-100 dafault50
        ///     通用  pitch           语调                          0-100 default50
        ///     离线  tts_res_path    合成资源路径                  合成资源所在路径,支持fo 方式参数设置
        ///     通用  rdn             数字发音                      0数值优先 1完全数值 2完全字符串 3字符串优先
        ///     离线  rcn             1 的中文发音                  0表示发音为yao 1表示发音为yi
        ///     通用  text_encoding   文本编码格式(必传)            合成文本编码格式,支持参数,GB2312,GBK,BIG5,UNICODE,GB18030,UTF8
        ///     通用  sample_rate     合成音频采样率                合成音频采样率,支持参数,16000,8000,默认为16000
        ///     在线  background_sound背景音                        0无背景音乐   1有背景音乐
        ///     在线  aue             音频编码格式和压缩等级        编码算法:raw;speex;speex-wb;ico 编码等级: raw不进行压缩 speex系列0-10
        ///     在线  ttp             文本类型                      text普通格式文本 cssml格式文本
        ///     离线  speed_increase  语速增强                      1正常 2二倍语速 4四倍语速
        ///     离线  effect          合成音效                      0无音效 1忽远忽近 2回声 3机器人 4合唱 5水下 6混响 7阴阳怪气
        /// <param name="errorCode">return 0 if success, otherwise return error code. 函数调用成功则其值为MSP_SUCCESS,否则返回错误代码</param>
        /// <returns>return the new session id if success, otherwise return null. 函数调用成功返回字符串格式的sessionID,失败返回NULL sessionID是本次合成的句柄</returns>
        [DllImport("msc_x64", CallingConvention = CallingConvention.StdCall)]
        public static extern IntPtr QTTSSessionBegin(string _params, ref int errorCode);
        /// <summary>
        /// writing text string to synthesizer. 写入要合成的文本
        /// </summary>
        /// <param name="sessionID">the session id returned by sesson begin. 由QTTSSessionBegin返回的句柄</param>
        /// <param name="textString">text buffer. 字符串指针 指向待合成的文本字符串</param>
        /// <param name="textLen">text size in bytes. 合成文本长度,最大支持8192个字节</param>
        /// <param name="_params">parameters when the session created. 本次合成所用的参数,只对本次合成的文本有效 目前为空</param>
        /// <returns>return 0 if success, otherwise return error code. 函数调用成功则其值为MSP_SUCCESS,否则返回错误代码</returns>
        [DllImport("msc_x64", CallingConvention = CallingConvention.StdCall)]
        public static extern int QTTSTextPut(IntPtr sessionID, string textString, uint textLen, string _params);
        /// <summary>
        /// synthesize text to audio, and return audio information. 获取合成音频
        /// </summary>
        /// <param name="sessionID">session id returned by session begin. 由QTTSSessionBegin返回的句柄</param>
        /// <param name="audioLen">synthesized audio size in bytes. 合成音频长度,单位字节</param>
        /// <param name="synthStatus">synthesizing status. 合成音频状态</param>
        /// <param name="errorCode">return 0 if success, otherwise return error code. 函数调用成功则其值为MSP_SUCCESS,否则返回错误代码</param>
        /// 用户需要反复获取音频,直到音频获取完毕或函数调用失败
        /// 在重复获取音频时,如果暂未获得音频数据,需要将当前线程sleep一段时间,以防频繁调用浪费CPU资源
        /// <returns>return current synthesized audio data buffer, size returned by QTTSTextSynth. 函数调用成功且有音频数据时返回非空指针 调用失败或无音频数据时,返回NULL</returns>
        [DllImport("msc_x64", CallingConvention = CallingConvention.StdCall)]
        public static extern IntPtr QTTSAudioGet(IntPtr sessionID, ref uint audioLen, ref SynthStatus synthStatus, ref int errorCode);
        /// <summary>
        /// get synthesized audio data information.
        /// </summary>
        /// <param name="sessionID">session id returned by session begin.</param>
        /// <returns>return audio info string.</returns>
        [DllImport("msc_x64", CallingConvention = CallingConvention.StdCall)]
        public static extern IntPtr QTTSAudioInfo(IntPtr sessionID);
        /// <summary>
        /// end the recognizer session, release all resource. 结束本次语音合成
        /// 本接口和QTTSSessionBegin对应,调用此接口后,该句柄对应的相关资源(参数 合成文本 实例等)都会被释放,用户不应再使用该句柄
        /// </summary>
        /// <param name="sessionID">session id string to end. 由QTTSSessionBegin返回的句柄</param>
        /// <param name="hints">user hints to end session, hints will be logged to CallLog. 结束本次语音合成的原因描述,为用户自定义内容</param>
        /// <returns>return 0 if success, otherwise return error code. 函数调用成功则其值为MSP_SUCCESS,否则返回错误代码</returns>
        [DllImport("msc_x64", CallingConvention = CallingConvention.StdCall)]
        public static extern int QTTSSessionEnd(IntPtr sessionID, string hints);
        /// <summary>
        /// set params related with msc.
        /// </summary>
        /// <param name="sessionID">session id of related param, set null to got global param.</param>
        /// <param name="paramName">param name,could pass more than one param split by ','';'or'\n'.</param>
        /// <param name="paramValue">param value buffer, malloced by user.</param>
        /// <returns>return 0 if success, otherwise return errcode.</returns>
        [DllImport("msc_x64", CallingConvention = CallingConvention.StdCall)]
        public static extern int QTTSSetParam(IntPtr sessionID, string paramName, byte[] paramValue);
        /// <summary>
        /// get params related with msc. 获取当前语音合成信息,如当前合成音频对应文本结束位置、上行流量、下行流量等
        /// </summary>
        /// <param name="sessionID">session id of related param, set NULL to got global param. 由QTTSSessionBegin返回的句柄,如果为NULL,获取msc的设置信息</param>
        /// <param name="paramName">param name,could pass more than one param split by ','';'or'\n'. 参数名,一次调用只支持查询一个参数</param>
        ///     在线  sid         服务端会话ID 长度为32字节
        ///     在线  upflow      上行数据量
        ///     在线  downflow    下行数据量
        ///     通用  ced         当前合成音频对应文本结束位置
        /// <param name="paramValue">param value buffer, malloced by user.</param>
        ///     输入: buffer首地址
        ///     输出: 向该buffer写入获取到的信息
        /// <param name="valueLen">pass in length of value buffer, and return length of value string</param>
        ///     输入: buffer的大小
        ///     输出: 信息实际长度(不含’\0’)
        /// <returns>return 0 if success, otherwise return errcode. 函数调用成功则其值为MSP_SUCCESS,否则返回错误代码</returns>
        [DllImport("msc_x64", CallingConvention = CallingConvention.StdCall)]
        public static extern int QTTSGetParam(IntPtr sessionID, string paramName, ref byte[] paramValue, ref uint valueLen);
    #endregion

    #region qivw.h 语音唤醒
    /// <summary>
    /// 开始唤醒功能，本次唤醒所用的参数等。
    /// </summary>
    /// <param name="grammarList">保留参数，设置为NULL即可。</param>
    /// <param name="_params"></param>
    /// <param name="errorCode">函数调用成功则其值为MSP_SUCCESS(0)，否则返回错误代码，详见错误码列表</param>
    /// <returns>函数调用成功返回字符串格式的sessionID，失败返回NULL。sessionID是本次唤醒的句柄。</returns>
    /// [DllImport("msc_x64", CallingConvention = CallingConvention.StdCall)]
    [DllImport("msc_x64", CallingConvention = CallingConvention.StdCall)]
    public static extern IntPtr QIVWSessionBegin(string grammarList, string _params, ref int errorCode);

    /// <summary>
    /// 结束本次语音唤醒。
    /// </summary>
    /// <param name="sessionID">由QIVWSessionBegin返回的句柄。</param>
    /// <param name="hints">结束本次语音唤醒的原因描述，为用户自定义内容。</param>
    /// <returns>函数调用成功则其值为MSP_SUCCESS，否则返回错误代码</returns>
    /// 本接口和QIVWSessionBegin 对应，用来本次语音唤醒。调用此接口后，该句柄对应的相关资源（参数、语法、音频、实例等）都会被释放，用户不应再使用该句柄。
    [DllImport("msc_x64", CallingConvention = CallingConvention.StdCall)]
    public static extern int QIVWSessionEnd(IntPtr sessionID, string hints);

    /// <summary>
    /// 写入本次唤醒的音频，本接口需要反复调用直到音频写完为止。
    /// </summary>
    /// <param name="sessionID">由QIVWSessionBegin返回的句柄。</param>
    /// <param name="audioData">音频数据缓冲区起始地址。</param>
    /// <param name="audioLen">音频数据长度，单位字节。</param>
    /// <param name="audioStatus">用来告知MSC音频发送是否完成，典型值如下：</param>
    ///         枚举常量                                 简介
    ///     MSP_AUDIO_SAMPLE_FIRST = 1               第一块音频
    ///     MSP_AUDIO_SAMPLE_CONTINUE = 2	        还有后继音频
    ///     MSP_AUDIO_SAMPLE_LAST = 4	            最后一块音频
    /// <returns></returns>
    [DllImport("msc_x64", CallingConvention = CallingConvention.StdCall)]
    public static extern int QIVWAudioWrite(IntPtr sessionID, byte[] audioData, uint audioLen, AudioStatus audioStatus);

    /// <summary>
    /// 注册回调。
    /// </summary>
    /// <param name="sessionID">由QIVWSessionBegin返回的句柄。</param>
    /// <param name="msgProcCb">注册通知的回调函数，唤醒结果将在此注册回调中返回。格式为：typedef int( *ivw_ntf_handler)( const char *sessionID, int msg,int param1, int param2, const void *info, void *userData );
    /// 参数说明查看官方文档</param>
    /// <param name="userData">用户数据</param>
    /// <returns></returns>
    [DllImport("msc_x64", CallingConvention = CallingConvention.StdCall)]
    public static extern int QIVWRegisterNotify(IntPtr sessionID, [MarshalAs(UnmanagedType.FunctionPtr)] ivw_ntf_handler msgProcCb, IntPtr userData);

    //定义回调函数
    [UnmanagedFunctionPointer(CallingConvention.Cdecl)]
    public delegate int ivw_ntf_handler(IntPtr sessionID, int msg, int param1, int param2, IntPtr info, IntPtr userData);
    #endregion
}

#region QISR
/// <summary>
/// 用来告知msc音频发送是否完成
/// </summary>
public enum AudioStatus
{
        MSP_AUDIO_SAMPLE_INIT = 0x00,
        MSP_AUDIO_SAMPLE_FIRST = 0x01, //第一块音频
        MSP_AUDIO_SAMPLE_CONTINUE = 0x02, //还有后继音频
        MSP_AUDIO_SAMPLE_LAST = 0x04, //最后一块音频
    }
/// <summary>
/// 端点检测（End-point detected）器所处的状态
/// </summary>
public enum EpStatus
    {
        MSP_EP_LOOKING_FOR_SPEECH = 0,    //还没有检测到音频的前端点
        MSP_EP_IN_SPEECH = 1,    //已经检测到了音频前端点,正在进行正常的音频处理
        MSP_EP_AFTER_SPEECH = 3,    //检测到音频的后端点,后记的音频会被msc忽略
        MSP_EP_TIMEOUT = 4,    //超时
        MSP_EP_ERROR = 5,    //出现错误
        MSP_EP_MAX_SPEECH = 6,    //音频过大
        MSP_EP_IDLE = 7,    // internal state after stop and before start
    }
/// <summary>
/// 识别器返回的状态,提醒用户及时开始\停止获取识别结果
/// </summary>
public enum RecogStatus
    {
        MSP_REC_STATUS_SUCCESS = 0,    //识别成功,此时用户可以调用QISRGetResult来获取(部分结果)
        MSP_REC_STATUS_NO_MATCH = 1,    //识别结束,没有识别结果
        MSP_REC_STATUS_INCOMPLETE = 2,    //未完成 正在识别中
        MSP_REC_STATUS_NON_SPEECH_DETECTED = 3,
        MSP_REC_STATUS_SPEECH_DETECTED = 4,
        MSP_REC_STATUS_COMPLETE = 5,    //识别结束
        MSP_REC_STATUS_MAX_CPU_TIME = 6,
        MSP_REC_STATUS_MAX_SPEECH = 7,
        MSP_REC_STATUS_STOPPED = 8,
        MSP_REC_STATUS_REJECTED = 9,
        MSP_REC_STATUS_NO_SPEECH_FOUND = 10,
        MSP_REC_STATUS_FAILURE = MSP_REC_STATUS_NO_MATCH,
}

#endregion

#region QTTS
/// <summary>
/// 合成状态
/// </summary>
public enum SynthStatus
    {
        MSP_TTS_FLAG_CMD_CANCLEED = 0,
        MSP_TTS_FLAG_STILL_HAVE_DATA = 1,   //音频还没获取完 还有后续的音频
        MSP_TTS_FLAG_DATA_END = 2,   //音频已经获取完
    }

    /// <summary>
    /// 语音音频头
    /// </summary>
    public struct WaveHeader
    {
        public int RIFFID;
        public int FileSize;
        public int RIFFType;
        public int FMTID;
        public int FMTSize;
        public short FMTTag;
        public ushort FMTChannel;
        public int FMTSamplesPerSec;
        public int AvgBytesPerSec;
        public ushort BlockAlign;
        public ushort BitsPerSample;
        public int DataID;
        public int DataSize;
    }

    public enum TtsVoice
    {
        XiaoYan = 0,    //讯飞小燕
        XuJiu = 1,    //讯飞许久
        XiaoPing = 2,    //讯飞小萍
        XiaoJing = 3,    //讯飞小婧
        XuXiaoBao = 4,    //讯飞许小宝
}
public enum SynthesizingStatus
{
    MSP_TTS_FLAG_STILL_HAVE_DATA = 1,
    MSP_TTS_FLAG_DATA_END = 2,
    MSP_TTS_FLAG_CMD_CANCELED = 4,
};

/* Handwriting process flags */
public enum HandwritingStatus
{
    MSP_HCR_DATA_FIRST = 1,
    MSP_HCR_DATA_CONTINUE = 2,
    MSP_HCR_DATA_END = 4,
};

/*ivw message type */
public enum IvwStatus
{
    MSP_IVW_MSG_WAKEUP = 1,
    MSP_IVW_MSG_ERROR = 2,
    MSP_IVW_MSG_ISR_RESULT = 3,
    MSP_IVW_MSG_ISR_EPS = 4,
    MSP_IVW_MSG_VOLUME = 5,
    MSP_IVW_MSG_ENROLL = 6,
    MSP_IVW_MSG_RESET = 7
};

/* Upload data process flags */
public enum UploadStatus
{
    MSP_DATA_SAMPLE_INIT = 0x00,
    MSP_DATA_SAMPLE_FIRST = 0x01,
    MSP_DATA_SAMPLE_CONTINUE = 0x02,
    MSP_DATA_SAMPLE_LAST = 0x04,
};
    #endregion

public class WAV
    {
        // convert two bytes to one float in the range -1 to 1
        static float BytesToFloat(byte firstByte, byte secondByte)
        {
            // convert two bytes to one short (little endian)
            short s = (short)((secondByte << 8) | firstByte);
            // convert to range from -1 to (just below) 1
            return s / 32768.0F;
        }
        static int BytesToInt(byte[] bytes, int offset = 0)
        {
            int value = 0;
            for (int i = 0; i < 4; i++)
            {
                value |= ((int)bytes[offset + i]) << (i * 8);
            }
            return value;
        }
        // properties
        public float[] LeftChannel { get; internal set; }
        public float[] RightChannel { get; internal set; }
        public int ChannelCount { get; internal set; }
        public int SampleCount { get; internal set; }
        public int Frequency { get; internal set; }
        public WAV(byte[] wav)
        {
            // Determine if mono or stereo
            ChannelCount = wav[22];     // Forget byte 23 as 99.999% of WAVs are 1 or 2 channels
            // Get the frequency
            Frequency = BytesToInt(wav, 24);
            // Get past all the other sub chunks to get to the data subchunk:
            int pos = 12;   // First Subchunk ID from 12 to 16
            // Keep iterating until we find the data chunk (i.e. 64 61 74 61 ...... (i.e. 100 97 116 97 in decimal))
            while (!(wav[pos] == 100 && wav[pos + 1] == 97 && wav[pos + 2] == 116 && wav[pos + 3] == 97))
            {
                pos += 4;
                int chunkSize = wav[pos] + wav[pos + 1] * 256 + wav[pos + 2] * 65536 + wav[pos + 3] * 16777216;
                pos += 4 + chunkSize;
            }
            pos += 8;
            // Pos is now positioned to start of actual sound data.
            SampleCount = (wav.Length - pos) / 2;     // 2 bytes per sample (16 bit sound mono)
            if (ChannelCount == 2) SampleCount /= 2;        // 4 bytes per sample (16 bit stereo)
            // Allocate memory (right will be null if only mono sound)
            LeftChannel = new float[SampleCount];
            if (ChannelCount == 2) RightChannel = new float[SampleCount];
            else RightChannel = null;
            // Write to double array/s:
            int i = 0;
            while (pos < wav.Length)
            {
                LeftChannel[i] = BytesToFloat(wav[pos], wav[pos + 1]);
                pos += 2;
                if (ChannelCount == 2)
                {
                    RightChannel[i] = BytesToFloat(wav[pos], wav[pos + 1]);
                    pos += 2;
                }
                i++;
            }
        }
        public override string ToString()
        {
            return string.Format("[WAV: LeftChannel={0}, RightChannel={1}, ChannelCount={2}, SampleCount={3}, Frequency={4}]", LeftChannel, RightChannel, ChannelCount, SampleCount, Frequency);
        }
    }

4.编写实际使用代码（自动录音+语音唤醒+语音合成+语音识别+通义千问大模型）

Unity使用通义千问大模型参考我的文章：Unity使用通义千问大模型接口

自动录音代码依然参考了该大佬的文章VAIN_K自动录音+截取音频保存，在两位大佬的基础上我进行了添加修改：

using System;
using System.Collections;
using System.IO;
using System.Threading;
using UnityEngine;

public class AutomaticRecord : MonoBehaviour
{
    [SerializeField] private float volume;//音量
    [SerializeField] private float minVolume = 2;//录音关闭音量值
    [SerializeField] private float maxVolume = 3;//录音开启音量值
    [SerializeField] private int minVolume_Number;//记录的小音量数量
    private const int minVolume_Sum = 200;//小音量总和值
    private const int VOLUME_DATA_LENGTH = 128;    //录制的声音长度
    private const int frequency = 16000; //码率
    private const int lengthSec = 600;   //录制时长

    private AudioSource audioSource;  //录制的音频
    private bool isRecord = false;//录音开关
    private bool isLogout = false;//是否已退出登录
    private bool isPlayed = true;//音频是否播放完毕(使当前可录入我们的问题)
    private int start;//录音起点
    private int end;//录音终点

    private string voiceText;
    private AudioClip aiAudioClip;
    public AudioSource _audioSource;
    private float time = 0;//定时器
    private float timeLimit = 25;
    public QWTurbo qw;

    private void Start()
    {
        audioSource = GetComponent<AudioSource>();
        audioSource.clip = Microphone.Start(null, true, lengthSec, frequency);
    }

    private void Update()
    {
        volume = GetVolume(audioSource.clip, VOLUME_DATA_LENGTH);
        RecordOpenClose();

        //长时间无反应，唤醒状态关闭
        if (Speech.isAwaken && isPlayed)
        {
            Debug.Log("正在计时");   
            time += Time.deltaTime;
            if(time > timeLimit)
            {
                AwakeClose();
                time = 0;
            }
        }
    }

    private void OnDisable()
    {
        if(!isLogout)
            Speech.MSPLogout();
    }

    /// <summary>
    /// 录音自动开关
    /// </summary>
    private void RecordOpenClose()
    {
        //上一个语音是否已经播放完毕
        if (isPlayed)
        {
            //开
            if (!isRecord && volume >= maxVolume)
            {
                start = Microphone.GetPosition(Microphone.devices[0]);
                isRecord = true;
                Debug.Log("自动录音开启：" + start);
                minVolume_Number = 0;
            }
            //关
            if (isRecord && volume < minVolume)
            {
                //当小音量的数量达到一定的次数再关闭
                if (minVolume_Number > minVolume_Sum)
                {
                    end = Microphone.GetPosition(Microphone.devices[0]);
                    minVolume_Number = 0;
                    isRecord = false;
                    byte[] clipBuffer = AudioClipToByte(audioSource.clip, start, end);
                    //File.WriteAllBytes(Application.streamingAssetsPath + "/audio.wav", playerClipByte);

                    //当前不是被激活状态
                    if (!Speech.isAwaken)
                    {
                        if (Speech.MSPLogin() == 0)
                        {
                            isLogout = false;
                            Speech.MSCAwaken(clipBuffer);
                            Thread.Sleep(150);
                            Debug.Log("是否被激活:" + Speech.isAwaken);

                            //不是关键词 仍然未被激活 退出登录
                            if (!Speech.isAwaken)
                            {
                                Speech.MSPLogout();
                                isLogout = true;
                            }
                            //是关键词 被激活 数字人回应
                            if (Speech.isAwaken)
                            {
                                aiAudioClip = Speech.Tts("我在，有什么我可以帮助你的吗？");
                                StartCoroutine(PlayAudioToFinish(aiAudioClip));
                            }
                        }
                    }
                    else//当前已经是激活状态
                    {
                        //进行语音对话
                        voiceText = Speech.Asr(clipBuffer);
                        if (voiceText == string.Empty)
                        {
                            Debug.Log("暂未检测到语音输入");
                        }
                        else
                        {
                            if (qw == null)
                            {
                                Debug.LogError("qw为空！");
                                Speech.MSPLogout();
                                isLogout = true;
                                return;
                            }
                            //与通义千问对接获取收到的文字
                            qw.PostMsg(voiceText, CallBack);
                        }
                    }
                    Debug.Log("自动录音关闭：" + end);
                }
                minVolume_Number++;
            }

        }
    }

    private void CallBack(string backMsg)
    {
        //播放ai转换后音频
        aiAudioClip = Speech.Tts(backMsg);
        StartCoroutine(PlayAudioToFinish(aiAudioClip));
    }

    IEnumerator PlayAudioToFinish(AudioClip audioClip)
    {
        _audioSource.PlayOneShot(audioClip);
        Thread.Sleep((int)audioClip.length);
        isPlayed = false;
        yield return new WaitForSeconds(audioClip.length);
        Debug.Log("音频播放完毕");
        isPlayed = true;
        time = 0;
    }

    private void AwakeClose()
    {
        aiAudioClip = Speech.Tts("有什么需要再叫我。");
        StartCoroutine(PlayAudioToFinish(aiAudioClip));
        Speech.isAwaken = false;
        Speech.MSPLogout();
        isLogout = true;
    }

    /// <summary>
    /// 获取音量
    /// </summary>
    /// <param name="clip">音频片段</param>
    /// <param name="lengthVolume">长度</param>
    /// <returns></returns>
    private float GetVolume(AudioClip clip, int lengthVolume)
    {
        if (Microphone.IsRecording(null))
        {
            float maxVolume = 0f;
            //用于储存一段时间内的音频信息
            float[] volumeData = new float[lengthVolume];
            //获取录制的音频的开头位置
            int offset = Microphone.GetPosition(null) - (lengthVolume + 1);
            if (offset < 0)
                return 0f;
            //获取数据
            clip.GetData(volumeData, offset);
            //解析数据
            for (int i = 0; i < lengthVolume; i++)
            {
                float tempVolume = volumeData[i];
                if (tempVolume > maxVolume)
                    maxVolume = tempVolume;
            }
            return maxVolume * 99;
        }
        return 0;
    }

    /// <summary>
    /// clip转byte[]
    /// </summary>
    /// <param name="clip">音频片段</param>
    /// <param name="star">开始点</param>
    /// <param name="end">结束点</param>
    /// <returns></returns>
    public byte[] AudioClipToByte(AudioClip clip, int star, int end)
    {
        float[] data;
        if (end > star)
            data = new float[end - star];
        else
            data = new float[clip.samples - star + end];
        clip.GetData(data, star);
        int rescaleFactor = 32767; //to convert float to Int16
        byte[] outData = new byte[data.Length * 2];
        for (int i = 0; i < data.Length; i++)
        {
            short temshort = (short)(data[i] * rescaleFactor);
            byte[] temdata = BitConverter.GetBytes(temshort);
            outData[i * 2] = temdata[0];
            outData[i * 2 + 1] = temdata[1];
        }
        return outData;
    }
}

5.代码中需要优化的地方

1.将多个bool值（isLogout、isPlayed、Speech.isAwake）替换为数字人的状态enum枚举类型

public enum State
{
    sleep,//未被唤醒
    listen,//被唤醒状态，等待被询问
    think,//被唤醒状态，正在思考
    speak//被唤醒状态，正在说话
}

这样，全局可使用可修改。

在思考、说话状态我们不进行计时功能，只有在listen状态下开启计时功能。

2.记不清了呜呜呜，我记得好像在时间节点快结束时去询问，会出现休眠语句（“有什么需要再叫我”）和数字人回答的问题会同时响应的bug

大概是需要在结束语“有什么需要再叫我”时状态设置为State.sleep。

然后在播放音频的协程方法中添加判断条件。

以下代码仅参考思路（写前半部分和后半部分时项目已发生改变T_T）

IEnumerator PlayingAudio(AudioClip audioClip,int state = 0)
{
    isEndOfAudio = false;
    //防止结束语"有什么需要再叫我"转换状态
    if(AiState._aiState != State.sleep)
        AiState._aiState = State.speak;
    mySource.clip = audioClip;
    mySource.PlayOneShot(audioClip);
    yield return new WaitForSeconds(audioClip.length);
    if (AiState._aiState != State.sleep)
        AiState._aiState = State.listen;
    mytime = 0;
}

3.技术和时间原因导致代码整得也有点乱，需要再优化，框架需要解耦方便后续维护。

苏轼轼

关注

9
点赞
踩
9

收藏

觉得还不错? 一键收藏
打赏
0
评论
Unity使用讯飞语音模型(语音合成+语音识别+语音唤醒)Window端SDK

【注意】在使用up主的demo时，我们需要将自己的dll库存放进去，appid换成我们自己的（APPID 于讯飞开发者控制台创建应用申请所得）在控制台下载对应SDK，由于讯飞官方只提供了C++/C语音版本，我们需要用C#调用下载SDK的dll库文件。另外我们如果要使用语音唤醒功能，还需要wakeupresource.jet拖进Unity项目中。在思考、说话状态我们不进行计时功能，只有在listen状态下开启计时功能。我们在使用语音功能时要先调用登入方法，结束时要调用退出登录的方法。这样，全局可使用可修改。
复制链接

扫一扫