【Unity学习】接入腾讯语音识别API实现实时语音识别

接入腾讯语音识别API实现实时语音识别

一、演示效果

演示效果

二、主要代码

using Cysharp.Threading.Tasks;
using System;
using System.Collections.Generic;
using System.IO;
using System.Security.Cryptography;
using System.Text;
using System.Threading;
using UnityEngine;
using UnityWebSocket;

namespace AIChat.Chat
{
    // API 响应数据结构
    [Serializable]
    public class ApiResponse
    {
        public int code;
        public string message;
        public string voice_id;
        public string message_id;
        public Result result;
        public int final;  // 是否是最终结果
    }

    // 结果数据结构
    [Serializable]
    public class Result
    {
        public int slice_type;
        public int index;
        public int start_time;
        public int end_time;
        public string voice_text_str;
        public int word_size;
        public string[] word_list;
        public object emotion_type;  // 可以考虑为具体类型
    }

    // 表示结束标记的数据结构
    [Serializable]
    public class EndText
    {
        public string type;

        public EndText()
        {
            type = "end";
        }
    }

    public class TencentSpeachToText : MonoBehaviour
    {
        [SerializeField] private string _SpeechRecognizeURL = "asr.cloud.tencent.com";
        [SerializeField] private string _appID = string.Empty;  // 腾讯云 AppID
        [SerializeField] private string _secretid = string.Empty;  // 腾讯云 SecretId
        [SerializeField] private string _secretkey = string.Empty;  // 腾讯云 SecretKey
        [SerializeField] private string _engine_model_type = "16k_zh";  // 引擎模型类型

        private IWebSocket socket;
        private string _microphoneDevice;
        private AudioClip _origionRecording;
        private CancellationTokenSource _cancelToken;
        private float[] _sampleBuffer;
        private int _lastSample = 0;
        private string _lastResult;
        private string _recognitionText;

        public WebSocketState state;

        private void Awake()
        {
            // 拼接语音识别 URL
            _SpeechRecognizeURL = GenerateSpeechRecognizeURL();

            // 初始化设备和取消令牌
            InitializeDevice();
            _cancelToken = new CancellationTokenSource();
        }

        // 初始化麦克风设备
        private void InitializeDevice()
        {
            if (Microphone.devices.Length > 0)
            {
                _microphoneDevice = Microphone.devices[0];
            }
            else
            {
                Debug.LogError("没有找到麦克风设备。");
            }
        }

        private void Update()
        {
            state = socket == null ? WebSocketState.Closed : socket.ReadyState;
        }

        #region 签名生成

        // 生成最终的签名 URL
        private string GenerateSpeechRecognizeURL()
        {
            string baseUrl = $"{_SpeechRecognizeURL}/asr/v2/{_appID}?";
            SortedDictionary<string, string> headers = SplicingHeaders();
            baseUrl = MakeSignPlainText(baseUrl, headers);
            string signature = GetSignature(_secretkey, baseUrl);
            return $"wss://{baseUrl}&signature={Uri.EscapeDataString(signature)}";
        }

        // 生成 HMACSHA1 签名
        private string GetSignature(string signKey, string secret)
        {
            using (HMACSHA1 mac = new HMACSHA1(Encoding.UTF8.GetBytes(signKey)))
            {
                byte[] hash = mac.ComputeHash(Encoding.UTF8.GetBytes(secret));
                return Convert.ToBase64String(hash);
            }
        }

        // 拼接请求头部信息
        private SortedDictionary<string, string> SplicingHeaders()
        {
            return new SortedDictionary<string, string>
            {
                { "engine_model_type", _engine_model_type },
                { "expired", ((int)DateTime.UtcNow.AddDays(10).Subtract(DateTime.UnixEpoch).TotalSeconds).ToString() },
                { "filter_dirty", "1" },
                { "filter_modal", "2" },
                { "filter_punc", "1" },
                { "needvad", "1" },
                { "nonce", new System.Random().Next(1000000000).ToString() },
                { "secretid", _secretid },
                { "timestamp", ((int)DateTime.UtcNow.Subtract(DateTime.UnixEpoch).TotalSeconds).ToString() },
                { "voice_format", "1" },
                { "voice_id", Guid.NewGuid().ToString() }
            };
        }

        // 生成签名的明文部分
        private string MakeSignPlainText(string url, SortedDictionary<string, string> requestParams)
        {
            StringBuilder builder = new StringBuilder(url);
            foreach (var kvp in requestParams)
            {
                builder.AppendFormat("{0}={1}&", kvp.Key, kvp.Value);
            }
            return builder.ToString().TrimEnd('&');
        }

        #endregion

        // 连接到服务器
        public void ConnectToServer()
        {
            socket = new WebSocket(_SpeechRecognizeURL);
            socket.OnOpen += Socket_OnOpen;
            socket.OnMessage += Socket_OnMessage;
            socket.OnClose += Socket_OnClose;
            socket.OnError += Socket_OnError;
            Debug.Log("<color=yellow>[Tencent]</color> 正在连接...");
            socket.ConnectAsync();
        }

        // 关闭连接
        public void CloseClient()
        {
            Debug.Log("<color=yellow>[Tencent]</color> 正在关闭...");
            socket.CloseAsync();
        }

        #region 语音识别

        // 开始麦克风录音
        public async UniTaskVoid StartMicrophone()
        {
            if (state != WebSocketState.Connecting && state != WebSocketState.Open)
            {
                ConnectToServer();
            }

            if (!Microphone.IsRecording(_microphoneDevice))
            {
                _origionRecording = Microphone.Start(_microphoneDevice, true, 200, 16000);
            }

            while (Microphone.IsRecording(_microphoneDevice))
            {
                SendAudioDataToRecognizer();
                await UniTask.Delay(40); // 每 40 毫秒发送数据
            }
        }

        // 停止麦克风录音
        public async UniTaskVoid StopMicrophone(Action<string> callback)
        {
            if (Microphone.IsRecording(_microphoneDevice))
            {
                Microphone.End(null);
            }

            await UniTask.WaitUntil(() => !Microphone.IsRecording(_microphoneDevice));

            var endText = JsonUtility.ToJson(new EndText());
            SendData(endText);

            callback?.Invoke(_recognitionText);
            _lastResult = string.Empty;
        }

        // 发送音频数据到识别服务器
        private void SendAudioDataToRecognizer()
        {
            int pos = Microphone.GetPosition(_microphoneDevice);
            int diff = pos - _lastSample;

            if (diff > 0)
            {
                if (_sampleBuffer == null || _sampleBuffer.Length != diff * _origionRecording.channels)
                {
                    _sampleBuffer = new float[diff * _origionRecording.channels];
                }

                _origionRecording.GetData(_sampleBuffer, _lastSample);
                byte[] ba = ConvertAudioClipDataToInt16ByteArray(_sampleBuffer);
                if (ba.Length != 0)
                {
                    SendData(ba);
                }
            }
            _lastSample = pos;
        }

        // 将音频浮点数据转换为 Int16 字节数组
        private byte[] ConvertAudioClipDataToInt16ByteArray(float[] data)
        {
            byte[] byteArray = new byte[data.Length * 2];
            for (int i = 0; i < data.Length; i++)
            {
                short value = (short)(data[i] * short.MaxValue);
                byteArray[i * 2] = (byte)(value & 0x00ff);
                byteArray[i * 2 + 1] = (byte)((value & 0xff00) >> 8);
            }
            return byteArray;
        }

        #endregion

        // 发送字符串数据到服务器
        private void SendData(string sendText)
        {
            if (state == WebSocketState.Connecting || state == WebSocketState.Closed || state == WebSocketState.Closing)
            {
                return;
            }
            socket?.SendAsync(sendText);
        }

        // 发送字节数据到服务器
        public void SendData(byte[] bytes)
        {
            if (state == WebSocketState.Connecting || state == WebSocketState.Closed || state == WebSocketState.Closing)
            {
                return;
            }
            socket?.SendAsync(bytes);
        }

        // WebSocket 事件处理方法
        private void Socket_OnOpen(object sender, OpenEventArgs e)
        {
            Debug.Log($"<color=yellow>[Tencent]</color> 已连接: {_SpeechRecognizeURL}");
        }

        private void Socket_OnMessage(object sender, MessageEventArgs e)
        {
            if (e.IsBinary)
            {
                Debug.Log($"<color=yellow>[Tencent]</color> 接收到字节数据 ({e.RawData.Length}): {e.Data}");
            }
            else if (e.IsText)
            {
                Debug.Log($"<color=yellow>[Tencent]</color> 接收到文本: {e.Data}");

                var response = JsonUtility.FromJson<ApiResponse>(e.Data);
                if (response != null && response.final != 1)
                {
                    _recognitionText = _lastResult + response.result.voice_text_str;
                    if (response.result.slice_type == 2)
                    {
                        _lastResult = _recognitionText;
                    }
                }
            }
        }

        private void Socket_OnClose(object sender, CloseEventArgs e)
        {
            Debug.Log($"<color=yellow>[Tencent]</color> 已关闭: 状态码: {e.StatusCode}, 原因: {e.Reason}");
        }

        private void Socket_OnError(object sender, UnityWebSocket.ErrorEventArgs e)
        {
            Debug.Log($"<color=yellow>[Tencent]</color> 错误: {e.Message}");
        }

        // 应用程序退出时关闭 WebSocket 连接
        private void OnApplicationQuit()
        {
            if (socket != null && socket.ReadyState != WebSocketState.Closed)
            {
                CloseClient();
            }
        }
    }
}

三、参考内容

腾讯实时语音识别API文档
UnityWebSocket

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值