[Unity]Unity科大讯飞语音接入DeepSeek

[Unity]Unity科大讯飞语音接入DeepSeek

一.去科大讯飞平台获取Appid

控制台-讯飞开放平台

随便创建一个应用后会给你一个Appid

 然后进入这个网址 选择SDK下载 选择旧版入口   

语音听写

SDK下载 - 科大讯飞api接口 - 讯飞开放平台

下载解压之后打开bin文件 找到里面的 msc_x64.dll文件 复制到你的unity项目中 (可以创建一个plugins文件夹)

二.去DeepSeek获取API Keys

网址在这: DeepSeek

和步骤一类似 但是不需要去下载SDK 就申请APIKeys就行 此外不多赘述

三.直接上代码

copy的时候记得改成自己的APIKey和appid

using System;
using System.Collections;
using System.Collections.Generic;
using System.Text;
using UnityEngine;
using UnityEngine.Networking;

/// <summary>
/// DeepSeek API管理器
/// </summary>
public class DeepSeekManager : MonoBehaviour
{
    // 单例模式
    private static DeepSeekManager _instance;
    public static DeepSeekManager Instance
    {
        get
        {
            return _instance;
        }
    }

    [Header("API设置")]
    [Tooltip("DeepSeek API密钥")]
    [SerializeField] private string apiKey = "你的APIKey";

    [Tooltip("API端点URL")]
    [SerializeField] private string apiEndpoint = "https://api.deepseek.com";

    [Tooltip("使用的模型名称")]
    [SerializeField] private string modelName = "deepseek-chat";

    [Tooltip("最大生成的Token数量")]
    [Range(100, 4000)]
    [SerializeField] private int maxTokens = 1000;

    [Tooltip("温度参数(0-2),越低回答越确定性")]
    [Range(0f, 2f)]
    [SerializeField] private float temperature = 0.7f;

    // 存储对话历史
    private List<Message> conversationHistory = new List<Message>();

    // 事件:当收到AI回复时触发
    public event Action<string> OnResponseReceived;

    // 事件:当API调用失败时触发
    public event Action<string> OnError;

    void Awake()
    {
        // 设置单例
        if (_instance == null)
        {
            _instance = this;
            DontDestroyOnLoad(this.gameObject);
        }
        else if (_instance != this)
        {
            Destroy(this.gameObject);
            return;
        }

        // 初始化对话历史
        ClearHistory();
    }



    /// <summary>
    /// 向DeepSeek发送消息并获取回复
    /// </summary>
    /// <param name="userMessage">用户消息</param>
    public void SendMessage(string userMessage)
    {
        if (string.IsNullOrEmpty(userMessage))
        {
            Debug.LogWarning("用户消息为空,不发送请求");
            return;
        }

        // 添加用户消息到历史
        conversationHistory.Add(new Message { role = "user", content = userMessage });

        // 发送API请求
        StartCoroutine(SendRequestToDeepSeek());
    }

    /// <summary>
    /// 清除对话历史
    /// </summary>
    public void ClearHistory()
    {
        conversationHistory.Clear();
        // 可以添加一个系统消息作为对话的初始提示
        conversationHistory.Add(new Message
        {
            role = "system",
            content = "你是一个有帮助的AI助手,请用简洁明了的方式回答问题。"
        });
    }

    /// <summary>
    /// 发送请求到DeepSeek API
    /// </summary>
    private IEnumerator SendRequestToDeepSeek()
    {
        // 创建请求体
        var requestData = new RequestData
        {
            model = modelName,
            messages = conversationHistory.ToArray(),
            max_tokens = maxTokens,
            temperature = temperature
        };

        // 将请求体转换为JSON
        string jsonRequestBody = JsonUtility.ToJson(requestData);
        Debug.Log("发送请求: " + jsonRequestBody);

        // 创建UnityWebRequest
        using (UnityWebRequest request = new UnityWebRequest(apiEndpoint, "POST"))
        {
            // 设置请求头
            request.SetRequestHeader("Content-Type", "application/json");
            request.SetRequestHeader("Authorization", "Bearer " + apiKey);

            // 设置请求体
            byte[] bodyRaw = Encoding.UTF8.GetBytes(jsonRequestBody);
            request.uploadHandler = new UploadHandlerRaw(bodyRaw);
            request.downloadHandler = new DownloadHandlerBuffer();

            // 发送请求
            yield return request.SendWebRequest();

            // 处理响应
            if (request.result != UnityWebRequest.Result.Success)
            {
                Debug.LogError("API请求错误: " + request.error);
                OnError?.Invoke("API请求失败: " + request.error);
            }
            else
            {
                string responseText = request.downloadHandler.text;
                Debug.Log("收到响应: " + responseText);

                try
                {
                    // 解析JSON响应
                    ResponseData responseData = JsonUtility.FromJson<ResponseData>(responseText);

                    if (responseData != null && responseData.choices != null && responseData.choices.Length > 0)
                    {
                        string assistantReply = responseData.choices[0].message.content;

                        // 添加AI回复到对话历史
                        conversationHistory.Add(new Message { role = "assistant", content = assistantReply });

                        // 触发回复接收事件
                        OnResponseReceived?.Invoke(assistantReply);
                    }
                    else
                    {
                        Debug.LogError("无法解析AI回复");
                        OnError?.Invoke("无法解析AI回复");
                    }
                }
                catch (Exception e)
                {
                    Debug.LogError("解析响应时出错: " + e.Message);
                    OnError?.Invoke("解析响应时出错: " + e.Message);
                }
            }
        }
    }

    // 设置API密钥(可以通过其他脚本在运行时设置)
    public void SetApiKey(string newKey)
    {
        apiKey = newKey;
    }

    // 设置模型名称
    public void SetModel(string newModel)
    {
        modelName = newModel;
    }

    // JSON序列化所需的类
    [Serializable]
    private class Message
    {
        public string role;
        public string content;
    }

    [Serializable]
    private class RequestData
    {
        public string model;
        public Message[] messages;
        public int max_tokens;
        public float temperature;
    }

    [Serializable]
    private class ResponseData
    {
        public string id;
        public string object_type;
        public long created;
        public string model;
        public Choice[] choices;
        public Usage usage;
    }

    [Serializable]
    private class Choice
    {
        public int index;
        public Message message;
        public string finish_reason;
    }

    [Serializable]
    private class Usage
    {
        public int prompt_tokens;
        public int completion_tokens;
        public int total_tokens;
    }
}

using System;
using System.Collections;
using UnityEngine;
using UnityEngine.UI;
using System.Threading;
using Unity.VisualScripting;

/// <summary>
/// 语音识别控制器:处理按钮点击事件和UI更新
/// </summary>
public class SpeechRecognitionController : MonoBehaviour
{
    // 单例模式
    private static SpeechRecognitionController _instance;
    public static SpeechRecognitionController Instance
    {
        get
        {
            return _instance;
        }
    }

    // 引用UI组件
    [SerializeField] private Button recognizeButton;
    [SerializeField] private Text resultText;        // 显示语音识别结果的文本
    [SerializeField] private Text statusText;
    [SerializeField] private Text countdownText;
    [SerializeField] private Text aiResponseText;    // 显示AI回复的文本组件

    // 录音设置
    [Header("录音设置")]
    [Tooltip("最大录音时长(秒)")]
    [Range(1, 60)]
    [SerializeField] private int maxRecordingDuration = 10;

    [Tooltip("录音采样率(Hz)")]
    [SerializeField] private int recordingFrequency = 16000; // 讯飞SDK默认采样率

    // AI设置
    [Header("AI设置")]
    [Tooltip("是否自动将识别到的文本发送给DeepSeek")]
    [SerializeField] private bool autoSendToDeepSeek = true;

    [Tooltip("等待语音识别完成后再显示结果")]
    [SerializeField] private bool waitForAiResponse = true;




    // 用于录音的变量
    private bool isRecording = false;
    private AudioClip recordedClip;
    private float recordingStartTime;
    private float remainingTime;

    // 线程相关
    private Thread recognitionThread = null;
    private string recognitionResult = "";
    private bool hasNewResult = false;
    private byte[] audioData; // 用于存储音频数据的字节数组
    private bool isQuitting = false; // 标记应用是否正在退出
    private bool isWaitingForAiResponse = false; // 是否正在等待AI回复

    void Awake()
    {
        // 设置单例
        _instance = this;
    }

    void Start()
    {
        // 确保引用已分配
        if (recognizeButton == null || resultText == null)
        {
            Debug.LogError("请在Inspector中分配Button和Text组件!");
            return;
        }

        // 添加按钮点击事件监听
        recognizeButton.onClick.AddListener(OnRecognizeButtonClick);

        // 初始化状态文本
        if (statusText != null)
            statusText.text = "点击按钮开始语音识别";

        // 隐藏倒计时
        if (countdownText != null)
            countdownText.gameObject.SetActive(false);

        int res = Speech.MSPLogin();
        if (res != 0)
        {
            Debug.LogError($"讯飞SDK初始化失败,错误码: {res}");
            if (statusText != null)
                statusText.text = "语音引擎初始化失败";
            recognizeButton.interactable = false;
        }

        // 注册DeepSeek回调
        if (DeepSeekManager.Instance != null)
        {
            DeepSeekManager.Instance.OnResponseReceived += OnDeepSeekResponseReceived;
            DeepSeekManager.Instance.OnError += OnDeepSeekError;
        }
        else
        {
            Debug.LogWarning("未找到DeepSeekManager实例,请确保场景中存在该组件");
        }
    }

    void OnApplicationQuit()
    {
        // 标记应用正在退出
        isQuitting = true;

        // 取消注册回调
        if (DeepSeekManager.Instance != null)
        {
            DeepSeekManager.Instance.OnResponseReceived -= OnDeepSeekResponseReceived;
            DeepSeekManager.Instance.OnError -= OnDeepSeekError;
        }

        // 安全地终止录音和线程
        SafeStopRecordingAndThread();

        // 退出登录SDK
        Speech.MSPLogout();
    }

    void OnDestroy()
    {
        // 只有在非退出状态下才需要清理资源
        if (!isQuitting)
        {
            // 取消注册回调
            if (DeepSeekManager.Instance != null)
            {
                DeepSeekManager.Instance.OnResponseReceived -= OnDeepSeekResponseReceived;
                DeepSeekManager.Instance.OnError -= OnDeepSeekError;
            }

            // 安全地终止录音和线程
            SafeStopRecordingAndThread();

            // 退出登录SDK
            Speech.MSPLogout();
        }
    }

    /// <summary>
    /// 处理DeepSeek响应
    /// </summary>
    private void OnDeepSeekResponseReceived(string response)
    {
        isWaitingForAiResponse = false;

        // 将AI回复显示到专门的文本组件中
        if (aiResponseText != null)
        {
            aiResponseText.text = response;
        }
        else
        {
            Debug.LogWarning("aiResponseText未分配,无法显示AI回复");
        }

        if (statusText != null)
        {
            statusText.text = "AI回复已接收";
        }
    }

    /// <summary>
    /// 处理DeepSeek错误
    /// </summary>
    private void OnDeepSeekError(string errorMessage)
    {
        isWaitingForAiResponse = false;

        Debug.LogError($"DeepSeek错误: {errorMessage}");

        if (aiResponseText != null)
        {
            aiResponseText.text = "无法获取AI回复: " + errorMessage;
        }

        if (statusText != null)
        {
            statusText.text = "AI响应出错";
        }
    }

    /// <summary>
    /// 安全地停止录音和线程
    /// </summary>
    private void SafeStopRecordingAndThread()
    {
        // 停止录音
        if (isRecording && Microphone.IsRecording(null))
        {
            Microphone.End(null);
            isRecording = false;
        }

        // 停止线程
        if (recognitionThread != null && recognitionThread.IsAlive)
        {
            try
            {
                recognitionThread.Abort();
            }
            catch (Exception e)
            {
                Debug.LogWarning($"停止线程时出错: {e.Message}");
            }
            finally
            {
                recognitionThread = null;
            }
        }

        // 隐藏倒计时
        if (countdownText != null)
            countdownText.gameObject.SetActive(false);
    }

    /// <summary>
    /// 按钮点击事件处理
    /// </summary>
    public void OnRecognizeButtonClick()
    {
        if (!isRecording)
        {
            StartRecording();
        }
        else
        {
            StopRecordingAndRecognition();
        }
    }

    /// <summary>
    /// 开始录音
    /// </summary>
    private void StartRecording()
    {
        // 如果正在等待AI响应,不允许新的录音
        if (isWaitingForAiResponse)
        {
            if (statusText != null)
                statusText.text = "请等待AI回复完成";
            return;
        }

        if (statusText != null)
            statusText.text = "正在录音...";

        if (recognizeButton != null && recognizeButton.GetComponentInChildren<Text>() != null)
            recognizeButton.GetComponentInChildren<Text>().text = "停止录音";

        isRecording = true;
        recordingStartTime = Time.time;
        remainingTime = maxRecordingDuration;

        // 显示倒计时
        if (countdownText != null)
        {
            countdownText.gameObject.SetActive(true);
            countdownText.text = $"剩余时间: {remainingTime:0}秒";
        }

        // 开始录制声音
        recordedClip = Microphone.Start(null, false, maxRecordingDuration, recordingFrequency);
    }

    /// <summary>
    /// 停止录音并开始识别
    /// </summary>
    private void StopRecordingAndRecognition()
    {
        // 如果有识别线程在运行,等待其完成
        if (recognitionThread != null && recognitionThread.IsAlive)
        {
            recognitionThread.Abort();
            recognitionThread = null;
        }

        if (!isRecording) return;

        // 停止录音
        string deviceName = Microphone.devices.Length > 0 ? Microphone.devices[0] : null;
        int position = Microphone.GetPosition(deviceName);
        Microphone.End(null);
        isRecording = false;

        // 隐藏倒计时
        if (countdownText != null)
            countdownText.gameObject.SetActive(false);

        // 安全地更新UI
        if (recognizeButton != null && recognizeButton.GetComponentInChildren<Text>() != null)
            recognizeButton.GetComponentInChildren<Text>().text = "开始语音识别";

        if (statusText != null)
            statusText.text = "正在识别语音...";

        // 确保录制的音频有足够的数据
        if (position <= 0)
        {
            if (statusText != null)
                statusText.text = "录音失败,请重试";
            return;
        }

        // 修剪音频到实际长度并获取音频数据(必须在主线程中完成)
        AudioClip trimmedClip = TrimAudioClip(recordedClip, 0, position);

        // 将AudioClip转换为字节数组,以便在线程中处理
        audioData = ConvertAudioClipToByteArray(trimmedClip);

        // 在新线程中进行语音识别
        recognitionThread = new Thread(RecognizeSpeech);
        recognitionThread.Start();
    }

    /// <summary>
    /// 将AudioClip转换为PCM16格式的字节数组
    /// </summary>
    private byte[] ConvertAudioClipToByteArray(AudioClip clip)
    {
        float[] samples = new float[clip.samples * clip.channels];
        clip.GetData(samples, 0);

        byte[] byteArray = new byte[samples.Length * 2];
        for (int i = 0; i < samples.Length; i++)
        {
            short value = (short)(samples[i] * short.MaxValue);
            byteArray[i * 2] = (byte)(value & 0xff);
            byteArray[i * 2 + 1] = (byte)((value >> 8) & 0xff);
        }

        return byteArray;
    }

    /// <summary>
    /// 语音识别处理(在线程中执行)
    /// </summary>
    private void RecognizeSpeech()
    {
        try
        {
            // 使用科大讯飞SDK的语音识别功能处理字节数组
            string result = Speech.Asr(audioData);

            // 保存识别结果并标记有新结果
            recognitionResult = result;
            hasNewResult = true;
        }
        catch (Exception e)
        {
            Debug.LogError($"语音识别错误: {e.Message}");

            // 在主线程中更新UI,使用安全的方式
            recognitionResult = "识别出错,请重试";
            hasNewResult = true;
        }
    }

    /// <summary>
    /// 检测文本是否包含关键词数组中的任意一个词
    /// </summary>
    private bool ContainsAnyKeyword(string text, string[] keywords)
    {
        if (string.IsNullOrEmpty(text) || keywords == null || keywords.Length == 0)
            return false;

        foreach (string keyword in keywords)
        {
            if (!string.IsNullOrEmpty(keyword) && text.Contains(keyword.ToLower()))
            {
                return true;
            }
        }

        return false;
    }

    void Update()
    {
        // 更新倒计时
        if (isRecording && countdownText != null)
        {
            remainingTime = maxRecordingDuration - (Time.time - recordingStartTime);
            if (remainingTime <= 0)
            {
                remainingTime = 0;
                // 时间到,停止录音
                StopRecordingAndRecognition();
            }
            else
            {
                countdownText.text = $"剩余时间: {remainingTime:0}秒";
            }
        }

        // 检查是否有新的识别结果
        if (hasNewResult && !isWaitingForAiResponse)
        {
            hasNewResult = false;

            // 更新UI显示识别结果
            if (resultText != null)
            {
                string displayText = string.IsNullOrEmpty(recognitionResult) ? "未能识别语音" : recognitionResult;
                resultText.text = displayText;
            }

            if (statusText != null)
                statusText.text = "识别完成";

            // 处理特殊命令
            bool shouldSendToDeepSeek = true;



            // 发送到DeepSeek进行AI回答
            if (shouldSendToDeepSeek && autoSendToDeepSeek && !string.IsNullOrEmpty(recognitionResult) && DeepSeekManager.Instance != null)
            {
                if (waitForAiResponse)
                {
                    isWaitingForAiResponse = true;
                    if (statusText != null)
                        statusText.text = "正在等待AI回复...";
                }

                DeepSeekManager.Instance.SendMessage(recognitionResult);
            }
        }
    }



    /// <summary>
    /// 修剪音频片段
    /// </summary>
    private AudioClip TrimAudioClip(AudioClip clip, int start, int end)
    {
        int length = end - start;
        if (length <= 0) return clip;

        float[] data = new float[length * clip.channels];
        clip.GetData(data, start);

        AudioClip trimmedClip = AudioClip.Create("trimmed", length, clip.channels, clip.frequency, false);
        trimmedClip.SetData(data, 0);

        return trimmedClip;
    }
}
using System;
using UnityEngine;

/// <summary>
/// 音频处理扩展方法
/// </summary>
public static class AudioExtensions
{
    /// <summary>
    /// 将AudioClip转换为PCM16格式的字节数组
    /// </summary>
    /// <param name="clip">要转换的AudioClip</param>
    /// <returns>PCM16格式的字节数组</returns>
    public static byte[] ToPCM16(this AudioClip clip)
    {
        float[] samples = new float[clip.samples * clip.channels];
        clip.GetData(samples, 0);

        byte[] byteArray = new byte[samples.Length * 2];
        for (int i = 0; i < samples.Length; i++)
        {
            short value = (short)(samples[i] * short.MaxValue);
            byteArray[i * 2] = (byte)(value & 0xff);
            byteArray[i * 2 + 1] = (byte)((value >> 8) & 0xff);
        }

        return byteArray;
    }

    /// <summary>
    /// 将WaveHeader转换为字节数组
    /// </summary>
    /// <param name="header">要转换的WaveHeader</param>
    /// <returns>表示WaveHeader的字节数组</returns>
    public static byte[] ToBytes(this WaveHeader header)
    {
        byte[] result = new byte[44];

        BitConverter.GetBytes(header.RIFFID).CopyTo(result, 0);
        BitConverter.GetBytes(header.FileSize).CopyTo(result, 4);
        BitConverter.GetBytes(header.RIFFType).CopyTo(result, 8);
        BitConverter.GetBytes(header.FMTID).CopyTo(result, 12);
        BitConverter.GetBytes(header.FMTSize).CopyTo(result, 16);
        BitConverter.GetBytes(header.FMTTag).CopyTo(result, 20);
        BitConverter.GetBytes(header.FMTChannel).CopyTo(result, 22);
        BitConverter.GetBytes(header.FMTSamplesPerSec).CopyTo(result, 24);
        BitConverter.GetBytes(header.AvgBytesPerSec).CopyTo(result, 28);
        BitConverter.GetBytes(header.BlockAlign).CopyTo(result, 32);
        BitConverter.GetBytes(header.BitsPerSample).CopyTo(result, 34);
        BitConverter.GetBytes(header.DataID).CopyTo(result, 36);
        BitConverter.GetBytes(header.DataSize).CopyTo(result, 40);

        return result;
    }

    /// <summary>
    /// 将包含WAV格式数据的字节数组转换为AudioClip
    /// </summary>
    /// <param name="bytes">包含WAV格式数据的字节数组</param>
    /// <returns>创建的AudioClip</returns>
    public static AudioClip ToWAV(this byte[] bytes)
    {
        // 使用WAV类解析WAV数据
        WAV wav = new WAV(bytes);

        // 创建AudioClip
        AudioClip audioClip = AudioClip.Create("FromWAV", wav.SampleCount, wav.ChannelCount, wav.Frequency, false);

        // 设置音频数据
        if (wav.ChannelCount == 1)
        {
            audioClip.SetData(wav.LeftChannel, 0);
        }
        else
        {
            // 如果是立体声,需要将左右声道合并
            float[] stereoData = new float[wav.SampleCount * 2];
            for (int i = 0; i < wav.SampleCount; i++)
            {
                stereoData[i * 2] = wav.LeftChannel[i];
                stereoData[i * 2 + 1] = wav.RightChannel[i];
            }
            audioClip.SetData(stereoData, 0);
        }

        return audioClip;
    }
}
using System;
using System.Collections.Generic;
using System.IO;
using System.Runtime.InteropServices;
using System.Text;
using System.Threading;
using Unity.VisualScripting;
using UnityEngine;


/// <summary>
/// 语音工具
/// </summary>
public static class Speech
{
    /* APPID 于讯飞开发者控制台创建应用申请所得 */
    const string mAppID = "appid=你的appid";
    

    /// <summary>
    /// 登录接口
    /// </summary>
    /// <returns></returns>
    public static int MSPLogin()
    {
        int res = MSCDLL.MSPLogin(null, null, mAppID);
        if (res != 0)
        {
            Debug.Log($"login failed. error code: {res}");
        }
        else
            Debug.Log("登录接口成功");
        return res;
    }
    /// <summary>
    /// 退出登录
    /// </summary>
    public static void MSPLogout()
    {
        int error = MSCDLL.MSPLogout();
        if (error != 0)
            Debug.Log($"logout failed. error code {error}");
        else
            Debug.Log("退出登录成功");
    }


    /// <summary>
    /// 科大讯飞语音识别
    /// </summary>
    /// <param name="clipBuffer">音频数据</param>
    /// <returns>识别后的字符串结果</returns>
    public static string Asr(byte[] clipBuffer)
    {
        /* 首先调用登录接口
         * 登录成功返回0,否则为错误代码 */
        int res = 0;
        /* 调用开启一次语音识别的接口
         * 接收返回的句柄,后续调用写入音频、获取结果等接口需要使用
         * 调用成功error code为0,否则为错误代码 
         * 备注:
         *  第二个参数为 开始一次语音识别的参数列表 可以再进行具体的封装
         *  例如 language参数 封装枚举 switch中文 为zh_cn    switch英文 为en_us
         *  具体参照科大讯飞官网SDK文档 */
        IntPtr sessionID = MSCDLL.QISRSessionBegin(null,
                "sub=iat,domain=iat,language=zh_cn,accent=mandarin,sample_rate=16000,result_type=plain,result_encoding= utf-8", ref res);
        if (res != 0)
        {
            Debug.Log($"begin failed. error code: {res}");
            OnErrorEvent();
            return null;
        }

        /* 用于记录端点状态 */
        EpStatus epStatus = EpStatus.MSP_EP_LOOKING_FOR_SPEECH;
        /* 用于记录识别状态 */
        RecogStatus recognizeStatus = RecogStatus.MSP_REC_STATUS_SUCCESS;

        /* 调用音频写入接口 将需要识别的音频数据传入
         * 写入成功返回0,否则为错误代码 */
        res = MSCDLL.QISRAudioWrite(sessionID, clipBuffer, (uint)clipBuffer.Length, AudioStatus.MSP_AUDIO_SAMPLE_CONTINUE, ref epStatus, ref recognizeStatus);
        if (res != 0)
        {
            Debug.Log($"write failed. error code: {res}");
            MSCDLL.QISRSessionEnd(sessionID, "error");
            OnErrorEvent();
            return null;
        }
        res = MSCDLL.QISRAudioWrite(sessionID, null, 0, AudioStatus.MSP_AUDIO_SAMPLE_LAST, ref epStatus, ref recognizeStatus);
        if (res != 0)
        {
            Debug.Log($"write failed. error code: {res}");
            MSCDLL.QISRSessionEnd(sessionID, "error");
            OnErrorEvent();
            return null;
        }

        /* 用于存储识别结果 */
        StringBuilder sb = new StringBuilder();
        /* 用于累加识别结果的长度 */
        int length = 0;

        /* 音频写入后 反复调用获取识别结果的接口直到获取完毕 */
        while (recognizeStatus != RecogStatus.MSP_REC_STATUS_COMPLETE)
        {
            IntPtr curtRslt = MSCDLL.QISRGetResult(sessionID, ref recognizeStatus, 0, ref res);
            if (res != 0)
            {
                Debug.Log($"get result failed. error code: {res}");
                MSCDLL.QISRSessionEnd(sessionID, "error");
                OnErrorEvent();
                return null;
            }
            /* 当前部分识别结果不为空 将其存入sb*/
            if (null != curtRslt)
            {
                length += curtRslt.ToString().Length;
                if (length > 4096)
                {
                    Debug.Log($"size not enough: {length} > 4096");
                    MSCDLL.QISRSessionEnd(sessionID, "error");
                    OnErrorEvent();
                    return sb.ToString();
                }
                sb.Append(Marshal.PtrToStringAnsi(curtRslt));
            }
            Thread.Sleep(150);
        }

        /* 获取完全部识别结果后 结束本次语音识别 */
        res = MSCDLL.QISRSessionEnd(sessionID, "ao li gei !");
        if (res != 0) Debug.Log($"end failed. error code: {res}");

        /* 最终退出登录 返回识别结果*/
        //MSPLogout();
        return sb.ToString();
    }

    /// <summary>
    /// 科大讯飞语音识别
    /// </summary>
    /// <param name="path">音频文件所在路径</param>
    /// <returns>识别后的字符串结果</returns>
    public static string Asr(string path)
    {
        if (string.IsNullOrEmpty(path))
        {
            Debug.Log("path can not be null.");
            return null;
        }
        byte[] clipBuffer;
        try
        {
            clipBuffer = File.ReadAllBytes(path);
        }
        catch (Exception e)
        {
            Debug.Log($"exception: {e.Message}");
            return null;
        }
        return Asr(clipBuffer);
    }
    /// <summary>
    /// 科大讯飞语音识别
    /// </summary>
    /// <param name="clip">需要识别的AudioClip</param>
    /// <returns>识别后的字符串结果</returns>
    public static string Asr(AudioClip clip)
    {
        byte[] clipBuffer = clip.ToPCM16();
        return Asr(clipBuffer);
    }

    /// <summary>
    /// 科大讯飞语音合成
    /// </summary>
    /// <param name="content">需要合成音频的文本内容</param>
    /// <returns>合成后的音频</returns>
    public static AudioClip Tts(string content, TtsVoice voice = TtsVoice.XuJiu)
    {
        /* 首先调用登录接口
         * 登录成功返回0,否则为错误代码 */
        //int res = MSPLogin();
        int res = 0;

        /* 调用开启一次语音合成的接口
         * 接收返回后的句柄,后续调用写入文本等接口需要使用
         * 调用成功error code为0,否则为错误代码
         * 备注:
         *  第一个参数为 开启一次语音合成的参数列表
         *  具体参照科大讯飞官网SDK文档 */
        string voicer = "";
        switch (voice)
        {
            case TtsVoice.XiaoYan:
                voicer = "xiaoyan";
                break;
            case TtsVoice.XuJiu:
                voicer = "aisjiuxu";
                break;
            case TtsVoice.XiaoPing:
                voicer = "aisxping";
                break;
            case TtsVoice.XiaoJing:
                voicer = "aisjinger";
                break;
            case TtsVoice.XuXiaoBao:
                voicer = "aisbabyxu";
                break;
            default:
                break;
        }
        IntPtr sessionID = MSCDLL.QTTSSessionBegin($"engine_type = cloud, voice_name = {voicer}, speed = 65, pitch = 40, text_encoding = utf8, sample_rate = 16000", ref res);
        if (res != 0)
        {
            Debug.Log($"begin failed. error code: {res}");
            OnErrorEvent();
            return null;
        }

        /* 调用写入文本的接口 将需要合成内容传入
         * 调用成功返回0,否则为错误代码 */
        res = MSCDLL.QTTSTextPut(sessionID, content, (uint)Encoding.UTF8.GetByteCount(content), string.Empty);
        if (res != 0)
        {
            Debug.Log($"put text failed. error code: {res}");
            OnErrorEvent();
            return null;
        }

        /* 用于记录长度 */
        uint audioLength = 0;
        /* 用于记录合成状态 */
        SynthStatus synthStatus = SynthStatus.MSP_TTS_FLAG_STILL_HAVE_DATA;

        List<byte[]> bytesList = new List<byte[]>();

        /* 文本写入后 调用获取合成音频的接口
         * 获取成功error code为0,否则为错误代码
         * 需反复调用 直到合成状态为结束 或出现错误代码 */
        try
        {
            while (true)
            {
                IntPtr intPtr = MSCDLL.QTTSAudioGet(sessionID, ref audioLength, ref synthStatus, ref res);
                byte[] byteArray = new byte[(int)audioLength];
                if (audioLength > 0) Marshal.Copy(intPtr, byteArray, 0, (int)audioLength);

                bytesList.Add(byteArray);

                Thread.Sleep(150);
                if (synthStatus == SynthStatus.MSP_TTS_FLAG_DATA_END || res != 0)
                    break;
            }
        }
        catch (Exception e)
        {
            OnErrorEvent();
            Debug.Log($"error: {e.Message}");
            return null;
        }

        int size = 0;
        for (int i = 0; i < bytesList.Count; i++)
        {
            size += bytesList[i].Length;
        }

        var header = GetWaveHeader(size);
        byte[] array = header.ToBytes();
        bytesList.Insert(0, array);
        size += array.Length;

        byte[] bytes = new byte[size];

        size = 0;
        for (int i = 0; i < bytesList.Count; i++)
        {
            bytesList[i].CopyTo(bytes, size);
            size += bytesList[i].Length;
        }
        AudioClip clip = bytes.ToWAV();


        res = MSCDLL.QTTSSessionEnd(sessionID, "ao li gei !");
        if (res != 0)
        {
            Debug.Log($"end failed. error code: {res}");
            OnErrorEvent();
            return clip;
        }

        //MSPLogout();
        return clip;
    }
    /// <summary>
    /// 科大讯飞语音合成
    /// </summary>
    /// <param name="content">需要合成的内容</param>
    /// <param name="path">将合成后的音频写入指定的路径</param>
    /// <returns>调用成功返回true 发生异常返回false</returns>
    public static bool Tts(string content, string path)
    {
        /* 首先调用登录接口
         * 登录成功返回0,否则为错误代码 */
        int res = MSCDLL.MSPLogin(null, null, mAppID);
        if (res != 0)
        {
            Debug.Log($"login failed. error code: {res}");
            return false;
        }

        /* 调用开启一次语音合成的接口
         * 接收返回后的句柄,后续调用写入文本等接口需要使用
         * 调用成功error code为0,否则为错误代码
         * 备注:
         *  第一个参数为 开启一次语音合成的参数列表
         *  具体参照科大讯飞官网SDK文档 */
        IntPtr sessionID = MSCDLL.QTTSSessionBegin("engine_type = cloud, voice = xiaoyan, text_encoding = utf8, sample_rate = 16000", ref res);
        if (res != 0)
        {
            Debug.Log($"begin failed. error code: {res}");
            OnErrorEvent();
            return false;
        }

        /* 调用写入文本的接口 将需要合成内容传入
         * 调用成功返回0,否则为错误代码 */
        res = MSCDLL.QTTSTextPut(sessionID, content, (uint)Encoding.UTF8.GetByteCount(content), string.Empty);
        if (res != 0)
        {
            Debug.Log($"put text failed. error code: {res}");
            OnErrorEvent();
            return false;
        }

        /* 用于记录长度 */
        uint audioLength = 0;
        /* 用于记录合成状态 */
        SynthStatus synthStatus = SynthStatus.MSP_TTS_FLAG_STILL_HAVE_DATA;
        /* 开启一个流 */
        MemoryStream ms = new MemoryStream();
        ms.Write(new byte[44], 0, 44);


        /* 文本写入后 调用获取合成音频的接口
         * 获取成功error code为0,否则为错误代码
         * 需反复调用 直到合成状态为结束 或出现错误代码 */
        try
        {
            while (true)
            {
                IntPtr intPtr = MSCDLL.QTTSAudioGet(sessionID, ref audioLength, ref synthStatus, ref res);
                byte[] byteArray = new byte[(int)audioLength];
                if (audioLength > 0) Marshal.Copy(intPtr, byteArray, 0, (int)audioLength);
                ms.Write(byteArray, 0, (int)audioLength);
                Thread.Sleep(150);
                if (synthStatus == SynthStatus.MSP_TTS_FLAG_DATA_END || res != 0)
                    break;
            }
        }
        catch (Exception e)
        {
            OnErrorEvent();
            Debug.Log($"error: {e.Message}");
            return false;
        }


        var header = GetWaveHeader((int)ms.Length);
        byte[] array = header.ToBytes();
        ms.Position = 0L;
        ms.Write(array, 0, array.Length);
        ms.Position = 0L;

        FileStream fs = new FileStream(path, System.IO.FileMode.Create, FileAccess.Write);
        ms.WriteTo(fs);
        ms.Close();
        fs.Close();

        res = MSCDLL.QTTSSessionEnd(sessionID, "ao li gei !");
        if (res != 0)
        {
            Debug.Log($"end failed. error code: {res}");
            OnErrorEvent();
            return false;
        }

        res = MSCDLL.MSPLogout();
        if (res != 0)
        {
            Debug.Log($"logout failed. error code: {res}");
            return false;
        }
        return true;
    }

  
    /* 发生异常后调用退出登录接口 */
    static void OnErrorEvent()
    {
        int res = MSCDLL.MSPLogout();
        if (res != 0)
        {
            Debug.Log($"logout failed. error code: {res}");
        }
    }
    /* 语音音频头 初始化赋值 */
    static WaveHeader GetWaveHeader(int dataLen)
    {
        return new WaveHeader
        {
            RIFFID = 1179011410,
            FileSize = dataLen - 8,
            RIFFType = 1163280727,
            FMTID = 544501094,
            FMTSize = 16,
            FMTTag = 1,
            FMTChannel = 1,
            FMTSamplesPerSec = 16000,
            AvgBytesPerSec = 32000,
            BlockAlign = 2,
            BitsPerSample = 16,
            DataID = 1635017060,
            DataSize = dataLen - 44
        };
    }
}
public class MSCDLL
{
    #region msp_cmn.h 通用接口
    /// <summary>
    /// 初始化msc 用户登录  user login. 
    /// 使用其他接口前必须先调用MSPLogin,可以在应用程序启动时调用
    /// </summary>
    /// <param name="usr">user name. 此参数保留 传入NULL即可</param>
    /// <param name="pwd">password. 此参数保留 传入NULL即可</param>
    /// <param name="parameters">parameters when user login. 每个参数和参数值通过key=value的形式组成参数对,如果有多个参数对,再用逗号进行拼接</param>
    ///     通用 appid 应用ID: 于讯飞开放平台申请SDK成功后获取到的appid
    ///     离线 engine_start 离线引擎启动: 启用离线引擎 支持参数: ivw:唤醒 asr:识别
    ///     离线 [xxx]_res_path 离线引擎资源路径: 设置ivw asr引擎离线资源路径 
    ///             详细格式: fo|[path]|[offset]|[length]|xx|xx 
    ///             单个资源路径示例: ivw_res_path=fo|res/ivw/wakeupresource.jet
    ///             多个资源路径示例: asr_res_path=fo|res/asr/common.jet;fo|res/asr/sms.jet
    /// <returns>return 0 if sucess, otherwise return error code. 成功返回MSP_SUCCESS,否则返回错误代码</returns>
    [DllImport("msc_x64", CallingConvention = CallingConvention.StdCall)]
    public static extern int MSPLogin(string usr, string pwd, string parameters);
    /// <summary>
    /// 退出登录  user logout.
    /// 本接口和MSPLogin配合使用 确保其他接口调用结束之后调用MSPLogout,否则结果不可预期
    /// </summary>
    /// <returns>如果函数调用成功返回MSP_SUCCESS,否则返回错误代码 return 0 if sucess, otherwise return error code.</returns>
    [DllImport("msc_x64", CallingConvention = CallingConvention.StdCall)]
    public static extern int MSPLogout();
    /// <summary>
    /// 用户数据上传  upload data such as user config, custom grammar, etc. 
    /// </summary>
    /// <param name="dataName">数据名称字符串  should be unique to diff other data.</param>
    /// <param name="data">待上传数据缓冲区的起始地址  the data buffer pointer, data could be binary.</param>
    /// <param name="dataLen">数据长度(如果是字符串,则不包含'\0')  length of data.</param>
    /// <param name="_params">parameters about uploading data.</param>
    ///     在线 sub = uup,dtt = userword 上传用户词表
    ///     在线 sub = uup,dtt = contact 上传联系人
    /// <param name="errorCode">return 0 if success, otherwise return error code.</param>
    /// <returns>data id returned by server, used for special command.</returns>
    [DllImport("msc_x64", CallingConvention = CallingConvention.StdCall)]
    public static extern IntPtr MSPUploadData(string dataName, IntPtr data, uint dataLen, string _params, ref int errorCode);
    /// <summary>
    /// write data to msc, such as data to be uploaded, searching text, etc.
    /// </summary>
    /// <param name="data">the data buffer pointer, data could be binary.</param>
    /// <param name="dataLen">length of data.</param>
    /// <param name="dataStatus">data status, 2: first or continuous, 4: last.</param>
    /// <returns>return 0 if success, otherwise return error code.</returns>
    [DllImport("msc_x64", CallingConvention = CallingConvention.StdCall)]
    public static extern int MSPAppendData(IntPtr data, uint dataLen, uint dataStatus);
    /// <summary>
    /// download data such as user config, etc.
    /// </summary>
    /// <param name="_params">parameters about data to be downloaded.</param>
    /// <param name="dataLen">length of received data.</param>
    /// <param name="errorCode">return 0 if success, otherwise return error code.</param>
    /// <returns>received data buffer pointer, data could be binary, null if failed or data does not exsit.</returns>
    [DllImport("msc_x64", CallingConvention = CallingConvention.StdCall)]
    public static extern IntPtr MSPDownloadData(string _params, ref uint dataLen, ref int errorCode);
    /// <summary>
    /// set param of msc.  参数设置接口、离线引擎初始化接口 
    /// </summary>
    /// <param name="paramName">param name.</param>
    ///     离线 engine_start   启动离线引擎
    ///     离线 engine_destroy 销毁离线引擎
    /// <param name="paramValue">param value. 参数值</param>
    /// <returns>return 0 if success, otherwise return errcode. 函数调用成功则其值为MSP_SUCCESS,否则返回错误代码</returns>
    [DllImport("msc_x64", CallingConvention = CallingConvention.StdCall)]
    public static extern int MSPSetParam(string paramName, string paramValue);
    /// <summary>
    /// get param of msc.  获取msc的设置信息
    /// </summary>
    /// <param name="paramName">param name. 参数名,一次调用只支持查询一个参数</param>
    ///     在线 upflow   上行数据量
    ///     在线 downflow 下行数据量
    /// <param name="paramValue">param value.</param>
    ///     输入: buffer首地址
    ///     输出: 向该buffer写入获取到的信息
    /// <param name="valueLen">param value (buffer) length.</param>
    ///     输入: buffer的大小
    ///     输出: 信息实际长度(不含'\0')
    /// <returns>return 0 if success, otherwise return errcode. 函数调用成功返回MSP_SUCCESS,否则返回错误代码</returns>
    [DllImport("msc_x64", CallingConvention = CallingConvention.StdCall)]
    public static extern int MSPGetParam(string paramName, ref byte[] paramValue, ref uint valueLen);
    /// <summary>
    /// get version of msc or local engine. 获取msc或本地引擎版本信息
    /// </summary>
    /// <param name="verName">version name, could be "msc", "aitalk", "aisound", "ivw". 参数名,一次调用只支持查询一个参数</param>
    ///     离线 ver_msc msc版本号
    ///     离线 ver_asr 离线识别版本号,目前不支持
    ///     离线 ver_tts 离线合成版本号
    ///     离线 ver_ivw 离线唤醒版本号
    /// <param name="errorCode">return 0 if success, otherwise return error code. 如果函数调用成功返回MSP_SUCCESS,否则返回错误代码</param>
    /// <returns>return version value if success, null if fail.  成功返回缓冲区指针,失败或数据不存在返回NULL</returns>
    [DllImport("msc_x64", CallingConvention = CallingConvention.StdCall)]
    public static extern IntPtr MSPGetVersion(string verName, ref int errorCode);
    #endregion

    #region qisr.h 语音识别
    /// <summary>
    /// create a recognizer session to recognize audio data. 开始一次语音识别
    /// </summary>
    /// <param name="grammarList">garmmars list, inline grammar support only one. 此参数保留,传入NULL即可</param>
    /// <param name="_params">parameters when the session created.</param>
    ///     通用  engine_type     引擎类型                      cloud在线引擎 local离线引擎
    ///     在线  sub             本次识别请求的类型            iat语音听写 asr命令词识别
    ///     在线  language        语言                          zh_cn简体中文 en_us英文
    ///     在线  domain          领域                          iat语音听写
    ///     在线  accent          语言区域                      mandarin普通话
    ///     通用  sample_rate     音频采样率                    16000 8000
    ///     通用  asr_threshold   识别门限                      离线语法识别结果门限值,设置只返回置信度得分大于此门限值的结果 0-100
    ///     离线  asr_denoise     是否开启降噪功能              0不开启 1开启
    ///     离线  asr_res_path    离线识别资源路径              离线识别资源所在路径
    ///     离线  grm_build_path  离线语法生成路径              构建离线语法所生成数据的保存路径(文件夹)
    ///     通用  result_type     结果格式                      plain json
    ///     通用  text_encoding   文本编码格式                  表示参数中携带的文本编码格式
    ///     离线  local_grammar   离线语法id                    构建离线语法后获得的语法ID
    ///     通用  ptt             添加标点符号(sub=iat时有效)   0:无标点符号;1:有标点符号
    ///     在线  aue             音频编码格式和压缩等级        编码算法:raw;speex;speex-wb;ico 编码等级: raw不进行压缩 speex系列0-10
    ///     通用  result_encoding 识别结果字符串所用编码格式    plain:UTF-8,GB2312 json:UTF-8
    ///     通用  vad_enable      VAD功能开关                   是否启用VAD 默认为开启VAD 0(或false)为关闭
    ///     通用  vad_bos         允许头部静音的最长时间        目前未开启该功能
    ///     通用  vad_eos         允许尾部静音的最长时间        0-10000毫秒 默认为2000
    /// <param name="errorCode">return 0 if success, otherwise return error code. 函数调用成功则其值为MSP_SUCCESS,否则返回错误代码</param>
    /// <returns>return session id of current session, null is failed. 函数调用成功返回字符串格式的sessionID,失败返回NULL sessionID是本次识别的句柄</returns>
    [DllImport("msc_x64", CallingConvention = CallingConvention.StdCall)]
    public static extern IntPtr QISRSessionBegin(string grammarList, string _params, ref int errorCode);
    /// <summary>
    /// writing binary audio data to recognizer. 写入本次识别的音频
    /// </summary>
    /// <param name="sessionID">the session id returned by recog_begin. 由QISRSessionBegin返回的句柄</param>
    /// <param name="waveData">binary data of waveform. 音频数据缓冲区起始地址</param>
    /// <param name="waveLen">waveform data size in bytes. 音频数据长度,单位字节</param>
    /// <param name="audioStatus">audio status. 用来告知msc音频发送是否完成</param>
    /// <param name="epStatus">ep status. 端点检测(End-point detected)器所处的状态</param>
    /// <param name="recogStatus">recognition status. 识别器返回的状态,提醒用户及时开始\停止获取识别结果</param>
    ///     本接口需不断调用,直到音频全部写入为止 上传音频时,需更新audioStatus的值 具体来说:
    ///         当写入首块音频时,将audioStatus置为MSP_AUDIO_SAMPLE_FIRST
    ///         当写入最后一块音频时,将audioStatus置为MSP_AUDIO_SAMPLE_LAST
    ///         其余情况下,将audioStatus置为MSP_AUDIO_SAMPLE_CONTINUE
    ///     同时,需定时检查两个变量: epStatus和recogStatus 具体来说:
    ///         当epStatus显示已检测到后端点时,MSC已不再接收音频,应及时停止音频写入
    ///         当rsltStatus显示有识别结果返回时,即可从MSC缓存中获取结果
    /// <returns>return 0 if success, otherwise return error code. 函数调用成功则其值为MSP_SUCCESS,否则返回错误代码</returns>
    [DllImport("msc_x64", CallingConvention = CallingConvention.StdCall)]
    public static extern int QISRAudioWrite(IntPtr sessionID, byte[] waveData, uint waveLen, AudioStatus audioStatus, ref EpStatus epStatus, ref RecogStatus recogStatus);
    /// <summary>
    /// get recognize result in specified format. 获取识别结果
    /// </summary>
    /// <param name="sessionID">session id returned by session begin. 由QISRSessionBegin返回的句柄</param>
    /// <param name="rsltStatus">status of recognition result, 识别结果的状态,其取值范围和含义请参考QISRAudioWrite 的参数recogStatus</param>
    /// <param name="waitTime">此参数做保留用</param>
    /// <param name="errorCode">return 0 if success, otherwise return error code. 函数调用成功则其值为MSP_SUCCESS,否则返回错误代码</param>
    /// 当写入音频过程中已经有部分识别结果返回时,可以获取结果
    /// 在音频写入完毕后,用户需反复调用此接口,直到识别结果获取完毕(rlstStatus值为5)或返回错误码 
    /// 注意:如果某次成功调用后暂未获得识别结果,请将当前线程sleep一段时间,以防频繁调用浪费CPU资源
    /// <returns>return 0 if success, otherwise return error code. 函数执行成功且有识别结果时,返回结果字符串指针 其他情况(失败或无结果)返回NULL</returns>
    [DllImport("msc_x64", CallingConvention = CallingConvention.StdCall)]
    public static extern IntPtr QISRGetResult(IntPtr sessionID, ref RecogStatus rsltStatus, int waitTime, ref int errorCode);
    /// <summary>
    /// end the recognizer session, release all resource. 结束本次语音识别
    /// 本接口和QISRSessionBegin对应,调用此接口后,该句柄对应的相关资源(参数、语法、音频、实例等)都会被释放,用户不应再使用该句柄
    /// </summary>
    /// <param name="sessionID">session id string to end. 由QISRSessionBegin返回的句柄</param>
    /// <param name="hints">user hints to end session, hints will be logged to CallLog. 结束本次语音识别的原因描述,为用户自定义内容</param>
    /// <returns>return 0 if sucess, otherwise return error code. 函数调用成功则其值为MSP_SUCCESS,否则返回错误代码</returns>
    [DllImport("msc_x64", CallingConvention = CallingConvention.StdCall)]
    public static extern int QISRSessionEnd(IntPtr sessionID, string hints);
    /// <summary>
    /// get params related with msc. 获取当次语音识别信息,如上行流量、下行流量等
    /// </summary>
    /// <param name="sessionID">session id of related param, set null to got global param. 由QISRSessionbegin返回的句柄,如果为NULL,获取msc的设置信息</param>
    /// <param name="paramName">param name,could pass more than one param split by ','';'or'\n'. 参数名,一次调用只支持查询一个参数</param>
    ///     在线  sid         服务端会话ID 长度为32字节
    ///     在线  upflow      上行数据量
    ///     在线  downflow    下行数据量
    ///     通用  volume      最后一次写入的音频的音量
    /// <param name="paramValue">param value buffer, malloced by user.</param>
    ///     输入: buffer首地址
    ///     输出: 向该buffer写入获取到的信息
    /// <param name="valueLen">pass in length of value buffer, and return length of value string.</param>
    ///     输入: buffer的大小
    ///     输出: 信息实际长度(不含’\0’)
    /// <returns>return 0 if success, otherwise return errcode. 函数调用成功返回MSP_SUCCESS,否则返回错误代码</returns>
    [DllImport("msc_x64", CallingConvention = CallingConvention.StdCall)]
    public static extern int QISRGetParam(string sessionID, string paramName, ref byte[] paramValue, ref uint valueLen);
    #endregion

    #region qtts.h 语音合成
    /// <summary>
    /// create a tts session to synthesize data. 开始一次语音合成,分配语音合成资源
    /// </summary>
    /// <param name="_params">parameters when the session created. 传入的参数列表</param>
    ///     通用  engine_type     引擎类型                      cloud在线引擎 local离线引擎
    ///     通用  voice_name      发音人                        不同的发音人代表了不同的音色 如男声、女声、童声等
    ///     通用  speed           语速                          0-100 default50
    ///     通用  volume          音量                          0-100 dafault50
    ///     通用  pitch           语调                          0-100 default50
    ///     离线  tts_res_path    合成资源路径                  合成资源所在路径,支持fo 方式参数设置
    ///     通用  rdn             数字发音                      0数值优先 1完全数值 2完全字符串 3字符串优先
    ///     离线  rcn             1 的中文发音                  0表示发音为yao 1表示发音为yi
    ///     通用  text_encoding   文本编码格式(必传)            合成文本编码格式,支持参数,GB2312,GBK,BIG5,UNICODE,GB18030,UTF8
    ///     通用  sample_rate     合成音频采样率                合成音频采样率,支持参数,16000,8000,默认为16000
    ///     在线  background_sound背景音                        0无背景音乐   1有背景音乐
    ///     在线  aue             音频编码格式和压缩等级        编码算法:raw;speex;speex-wb;ico 编码等级: raw不进行压缩 speex系列0-10
    ///     在线  ttp             文本类型                      text普通格式文本 cssml格式文本
    ///     离线  speed_increase  语速增强                      1正常 2二倍语速 4四倍语速
    ///     离线  effect          合成音效                      0无音效 1忽远忽近 2回声 3机器人 4合唱 5水下 6混响 7阴阳怪气
    /// <param name="errorCode">return 0 if success, otherwise return error code. 函数调用成功则其值为MSP_SUCCESS,否则返回错误代码</param>
    /// <returns>return the new session id if success, otherwise return null. 函数调用成功返回字符串格式的sessionID,失败返回NULL sessionID是本次合成的句柄</returns>
    [DllImport("msc_x64", CallingConvention = CallingConvention.StdCall)]
    public static extern IntPtr QTTSSessionBegin(string _params, ref int errorCode);
    /// <summary>
    /// writing text string to synthesizer. 写入要合成的文本
    /// </summary>
    /// <param name="sessionID">the session id returned by sesson begin. 由QTTSSessionBegin返回的句柄</param>
    /// <param name="textString">text buffer. 字符串指针 指向待合成的文本字符串</param>
    /// <param name="textLen">text size in bytes. 合成文本长度,最大支持8192个字节</param>
    /// <param name="_params">parameters when the session created. 本次合成所用的参数,只对本次合成的文本有效 目前为空</param>
    /// <returns>return 0 if success, otherwise return error code. 函数调用成功则其值为MSP_SUCCESS,否则返回错误代码</returns>
    [DllImport("msc_x64", CallingConvention = CallingConvention.StdCall)]
    public static extern int QTTSTextPut(IntPtr sessionID, string textString, uint textLen, string _params);
    /// <summary>
    /// synthesize text to audio, and return audio information. 获取合成音频
    /// </summary>
    /// <param name="sessionID">session id returned by session begin. 由QTTSSessionBegin返回的句柄</param>
    /// <param name="audioLen">synthesized audio size in bytes. 合成音频长度,单位字节</param>
    /// <param name="synthStatus">synthesizing status. 合成音频状态</param>
    /// <param name="errorCode">return 0 if success, otherwise return error code. 函数调用成功则其值为MSP_SUCCESS,否则返回错误代码</param>
    /// 用户需要反复获取音频,直到音频获取完毕或函数调用失败
    /// 在重复获取音频时,如果暂未获得音频数据,需要将当前线程sleep一段时间,以防频繁调用浪费CPU资源
    /// <returns>return current synthesized audio data buffer, size returned by QTTSTextSynth. 函数调用成功且有音频数据时返回非空指针 调用失败或无音频数据时,返回NULL</returns>
    [DllImport("msc_x64", CallingConvention = CallingConvention.StdCall)]
    public static extern IntPtr QTTSAudioGet(IntPtr sessionID, ref uint audioLen, ref SynthStatus synthStatus, ref int errorCode);
    /// <summary>
    /// get synthesized audio data information.
    /// </summary>
    /// <param name="sessionID">session id returned by session begin.</param>
    /// <returns>return audio info string.</returns>
    [DllImport("msc_x64", CallingConvention = CallingConvention.StdCall)]
    public static extern IntPtr QTTSAudioInfo(IntPtr sessionID);
    /// <summary>
    /// end the recognizer session, release all resource. 结束本次语音合成
    /// 本接口和QTTSSessionBegin对应,调用此接口后,该句柄对应的相关资源(参数 合成文本 实例等)都会被释放,用户不应再使用该句柄
    /// </summary>
    /// <param name="sessionID">session id string to end. 由QTTSSessionBegin返回的句柄</param>
    /// <param name="hints">user hints to end session, hints will be logged to CallLog. 结束本次语音合成的原因描述,为用户自定义内容</param>
    /// <returns>return 0 if success, otherwise return error code. 函数调用成功则其值为MSP_SUCCESS,否则返回错误代码</returns>
    [DllImport("msc_x64", CallingConvention = CallingConvention.StdCall)]
    public static extern int QTTSSessionEnd(IntPtr sessionID, string hints);
    /// <summary>
    /// set params related with msc.
    /// </summary>
    /// <param name="sessionID">session id of related param, set null to got global param.</param>
    /// <param name="paramName">param name,could pass more than one param split by ','';'or'\n'.</param>
    /// <param name="paramValue">param value buffer, malloced by user.</param>
    /// <returns>return 0 if success, otherwise return errcode.</returns>
    [DllImport("msc_x64", CallingConvention = CallingConvention.StdCall)]
    public static extern int QTTSSetParam(IntPtr sessionID, string paramName, byte[] paramValue);
    /// <summary>
    /// get params related with msc. 获取当前语音合成信息,如当前合成音频对应文本结束位置、上行流量、下行流量等
    /// </summary>
    /// <param name="sessionID">session id of related param, set NULL to got global param. 由QTTSSessionBegin返回的句柄,如果为NULL,获取msc的设置信息</param>
    /// <param name="paramName">param name,could pass more than one param split by ','';'or'\n'. 参数名,一次调用只支持查询一个参数</param>
    ///     在线  sid         服务端会话ID 长度为32字节
    ///     在线  upflow      上行数据量
    ///     在线  downflow    下行数据量
    ///     通用  ced         当前合成音频对应文本结束位置
    /// <param name="paramValue">param value buffer, malloced by user.</param>
    ///     输入: buffer首地址
    ///     输出: 向该buffer写入获取到的信息
    /// <param name="valueLen">pass in length of value buffer, and return length of value string</param>
    ///     输入: buffer的大小
    ///     输出: 信息实际长度(不含’\0’)
    /// <returns>return 0 if success, otherwise return errcode. 函数调用成功则其值为MSP_SUCCESS,否则返回错误代码</returns>
    [DllImport("msc_x64", CallingConvention = CallingConvention.StdCall)]
    public static extern int QTTSGetParam(IntPtr sessionID, string paramName, ref byte[] paramValue, ref uint valueLen);
    #endregion

}

#region QISR
/// <summary>
/// 用来告知msc音频发送是否完成
/// </summary>
public enum AudioStatus
{
    MSP_AUDIO_SAMPLE_INIT = 0x00,
    MSP_AUDIO_SAMPLE_FIRST = 0x01, //第一块音频
    MSP_AUDIO_SAMPLE_CONTINUE = 0x02, //还有后继音频
    MSP_AUDIO_SAMPLE_LAST = 0x04, //最后一块音频
}
/// <summary>
/// 端点检测(End-point detected)器所处的状态
/// </summary>
public enum EpStatus
{
    MSP_EP_LOOKING_FOR_SPEECH = 0,    //还没有检测到音频的前端点
    MSP_EP_IN_SPEECH = 1,    //已经检测到了音频前端点,正在进行正常的音频处理
    MSP_EP_AFTER_SPEECH = 3,    //检测到音频的后端点,后记的音频会被msc忽略
    MSP_EP_TIMEOUT = 4,    //超时
    MSP_EP_ERROR = 5,    //出现错误
    MSP_EP_MAX_SPEECH = 6,    //音频过大
    MSP_EP_IDLE = 7,    // internal state after stop and before start
}
/// <summary>
/// 识别器返回的状态,提醒用户及时开始\停止获取识别结果
/// </summary>
public enum RecogStatus
{
    MSP_REC_STATUS_SUCCESS = 0,    //识别成功,此时用户可以调用QISRGetResult来获取(部分结果)
    MSP_REC_STATUS_NO_MATCH = 1,    //识别结束,没有识别结果
    MSP_REC_STATUS_INCOMPLETE = 2,    //未完成 正在识别中
    MSP_REC_STATUS_NON_SPEECH_DETECTED = 3,
    MSP_REC_STATUS_SPEECH_DETECTED = 4,
    MSP_REC_STATUS_COMPLETE = 5,    //识别结束
    MSP_REC_STATUS_MAX_CPU_TIME = 6,
    MSP_REC_STATUS_MAX_SPEECH = 7,
    MSP_REC_STATUS_STOPPED = 8,
    MSP_REC_STATUS_REJECTED = 9,
    MSP_REC_STATUS_NO_SPEECH_FOUND = 10,
    MSP_REC_STATUS_FAILURE = MSP_REC_STATUS_NO_MATCH,
}

#endregion

#region QTTS
/// <summary>
/// 合成状态
/// </summary>
public enum SynthStatus
{
    MSP_TTS_FLAG_CMD_CANCLEED = 0,
    MSP_TTS_FLAG_STILL_HAVE_DATA = 1,   //音频还没获取完 还有后续的音频
    MSP_TTS_FLAG_DATA_END = 2,   //音频已经获取完
}

/// <summary>
/// 语音音频头
/// </summary>
public struct WaveHeader
{
    public int RIFFID;
    public int FileSize;
    public int RIFFType;
    public int FMTID;
    public int FMTSize;
    public short FMTTag;
    public ushort FMTChannel;
    public int FMTSamplesPerSec;
    public int AvgBytesPerSec;
    public ushort BlockAlign;
    public ushort BitsPerSample;
    public int DataID;
    public int DataSize;
}

public enum TtsVoice
{
    XiaoYan = 0,    //讯飞小燕
    XuJiu = 1,    //讯飞许久
    XiaoPing = 2,    //讯飞小萍
    XiaoJing = 3,    //讯飞小婧
    XuXiaoBao = 4,    //讯飞许小宝
}
public enum SynthesizingStatus
{
    MSP_TTS_FLAG_STILL_HAVE_DATA = 1,
    MSP_TTS_FLAG_DATA_END = 2,
    MSP_TTS_FLAG_CMD_CANCELED = 4,
};

/* Handwriting process flags */
public enum HandwritingStatus
{
    MSP_HCR_DATA_FIRST = 1,
    MSP_HCR_DATA_CONTINUE = 2,
    MSP_HCR_DATA_END = 4,
};


/* Upload data process flags */
public enum UploadStatus
{
    MSP_DATA_SAMPLE_INIT = 0x00,
    MSP_DATA_SAMPLE_FIRST = 0x01,
    MSP_DATA_SAMPLE_CONTINUE = 0x02,
    MSP_DATA_SAMPLE_LAST = 0x04,
};
#endregion

public class WAV
{
    // convert two bytes to one float in the range -1 to 1
    static float BytesToFloat(byte firstByte, byte secondByte)
    {
        // convert two bytes to one short (little endian)
        short s = (short)((secondByte << 8) | firstByte);
        // convert to range from -1 to (just below) 1
        return s / 32768.0F;
    }
    static int BytesToInt(byte[] bytes, int offset = 0)
    {
        int value = 0;
        for (int i = 0; i < 4; i++)
        {
            value |= ((int)bytes[offset + i]) << (i * 8);
        }
        return value;
    }
    // properties
    public float[] LeftChannel { get; internal set; }
    public float[] RightChannel { get; internal set; }
    public int ChannelCount { get; internal set; }
    public int SampleCount { get; internal set; }
    public int Frequency { get; internal set; }
    public WAV(byte[] wav)
    {
        // Determine if mono or stereo
        ChannelCount = wav[22];     // Forget byte 23 as 99.999% of WAVs are 1 or 2 channels
                                    // Get the frequency
        Frequency = BytesToInt(wav, 24);
        // Get past all the other sub chunks to get to the data subchunk:
        int pos = 12;   // First Subchunk ID from 12 to 16
                        // Keep iterating until we find the data chunk (i.e. 64 61 74 61 ...... (i.e. 100 97 116 97 in decimal))
        while (!(wav[pos] == 100 && wav[pos + 1] == 97 && wav[pos + 2] == 116 && wav[pos + 3] == 97))
        {
            pos += 4;
            int chunkSize = wav[pos] + wav[pos + 1] * 256 + wav[pos + 2] * 65536 + wav[pos + 3] * 16777216;
            pos += 4 + chunkSize;
        }
        pos += 8;
        // Pos is now positioned to start of actual sound data.
        SampleCount = (wav.Length - pos) / 2;     // 2 bytes per sample (16 bit sound mono)
        if (ChannelCount == 2) SampleCount /= 2;        // 4 bytes per sample (16 bit stereo)
                                                        // Allocate memory (right will be null if only mono sound)
        LeftChannel = new float[SampleCount];
        if (ChannelCount == 2) RightChannel = new float[SampleCount];
        else RightChannel = null;
        // Write to double array/s:
        int i = 0;
        while (pos < wav.Length)
        {
            LeftChannel[i] = BytesToFloat(wav[pos], wav[pos + 1]);
            pos += 2;
            if (ChannelCount == 2)
            {
                RightChannel[i] = BytesToFloat(wav[pos], wav[pos + 1]);
                pos += 2;
            }
            i++;
        }
    }
    public override string ToString()
    {
        return string.Format("[WAV: LeftChannel={0}, RightChannel={1}, ChannelCount={2}, SampleCount={3}, Frequency={4}]", LeftChannel, RightChannel, ChannelCount, SampleCount, Frequency);
    }
}

总体来说就这样实现 自己再加上UI测试一下 下面是我的测试图

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值