Unity 接入百度长文本合成语音

注意:Unity版本2020及以上

1. 创建空物体,并挂载以下脚本

using System.Collections;
using UnityEngine;
using UnityEngine.Networking;

namespace BaiDuAI
{
    public class BaiduSettings : MonoBehaviour
    {
        #region 参数定义
        /// <summary>
        /// API Key
        /// </summary>
        [Header("应用的API Key")] public string m_API_key = string.Empty;
        /// <summary>
        /// Secret Key
        /// </summary>
        [Header("应用的Secret Key")] public string m_Client_secret = string.Empty;
        
        [Header("合成URL")] public string m_CompositeURL = "https://aip.baidubce.com/rpc/2.0/tts/v1/create";
        [Header("查询URL")] public string m_QueryURL = "https://aip.baidubce.com/rpc/2.0/tts/v1/query";
        /// <summary>
        /// token值
        /// </summary>
        [HideInInspector]public string m_Token = string.Empty;
        /// <summary>
        /// 是否从服务器获取token
        /// </summary>
        [Header("填写TokenURL")][SerializeField] private bool m_GetTokenFromServer = true;
        [SerializeField] private string m_AuthorizeURL = "https://aip.baidubce.com/oauth/2.0/token";
        #endregion
    
        private void Awake()
        {
            if (m_GetTokenFromServer)
            {
                StartCoroutine(GetToken(GetTokenAction));
            }
        }


        /// <summary>
        /// 获取到token
        /// </summary>
        /// <param name="_token"></param>
        private void GetTokenAction(string _token)
        {
            m_Token = _token;
        }

        /// <summary>
        /// 获取token的方法
        /// </summary>
        /// <param name="_callback"></param>
        /// <returns></returns>
        private IEnumerator GetToken(System.Action<string> _callback)
        {
            //获取token的api地址
            string _token_url = $"{m_AuthorizeURL}?grant_type=client_credentials&client_id={m_API_key}&client_secret={m_Client_secret}";

            using UnityWebRequest request = UnityWebRequest.Get(_token_url);
            yield return request.SendWebRequest();
            if (request.result !=UnityWebRequest.Result.Success)
            {
                Debug.LogError(request.error);
            }
            else
            {
                string _msg = request.downloadHandler.text;
                TokenInfo _textback = JsonUtility.FromJson<TokenInfo>(_msg);
                string _token = _textback.access_token;
                _callback(_token);
            }
        }
    }


    /// <summary>
    /// 返回的token
    /// </summary>
    [System.Serializable]
    public class TokenInfo
    {
        public string access_token = string.Empty;
    }
}
using System;
using System.Collections;
using System.Text;
using UnityEngine;
using UnityEngine.Networking;
using Debug = UnityEngine.Debug;

namespace BaiDuAI
{
    [RequireComponent(typeof(BaiduSettings))]
    public class BaiduTextToSpeech : MonoBehaviour
    {
        #region 参数
        /// <summary>
        /// token脚本
        /// </summary>
        [SerializeField] private BaiduSettings m_Settings;
        /// <summary>
        /// 语音合成设置
        /// </summary>
        [SerializeField] private PostDataSetting m_Post_Setting;
        #endregion

        private void Awake()
        {
            m_Settings = this.GetComponent<BaiduSettings>();
        }

        #region Public Method


        /// <summary>
        /// 语音合成,返回合成结果
        /// </summary>
        /// <param name="msg"></param>
        /// <param name="callback"></param>
        public void Speak(string msg, Action<AudioClip> callback)
        {
            StartCoroutine( GetSpeech(msg, callback));
        }

        #endregion

        #region Private Method

        private IEnumerator GetSpeech(string msg, Action<AudioClip> callback)
        {
            string url = $"{m_Settings.m_CompositeURL}?access_token={m_Settings.m_Token}";
            
            Message data = new Message()
            {
                text = new []{msg},
                format=m_Post_Setting.format,
                voice= SetReader(m_Post_Setting.per),
                lang=m_Post_Setting.lang,
                speed=m_Post_Setting.speed,
                pitch=m_Post_Setting.pitch,
                volume=m_Post_Setting.volume
            };

            string jsonData = JsonUtility.ToJson(data);
       
            print(jsonData);
            // 发送请求
            using UnityWebRequest request = new UnityWebRequest(url, "POST");
            byte[] bodyRaw = Encoding.UTF8.GetBytes(jsonData);
            request.uploadHandler = new UploadHandlerRaw(bodyRaw);
            request.downloadHandler = new DownloadHandlerBuffer();
            request.SetRequestHeader("Content-Type", "application/json");

            yield return request.SendWebRequest();

            if (request.result != UnityWebRequest.Result.Success)
            {
                Debug.LogError(request.error);
            }
            else
            {
                string responseText = request.downloadHandler.text;
                string audioID = JsonUtility.FromJson<AudioSynthesisTaskInfo>(responseText).task_id;
                StartCoroutine(QueryResultantResult(audioID,callback));
            }
        }
    
    
        IEnumerator QueryResultantResult(string audioID,Action<AudioClip> callback)
        {
            string url = $"{m_Settings.m_QueryURL}?access_token={m_Settings.m_Token}";
            while (true)
            {
                yield return new WaitForSeconds(1f);
                AudioSynthesisTasks synthesisTasks = new AudioSynthesisTasks
                {
                    task_ids = new[] { audioID }
                };
                string payload = JsonUtility.ToJson(synthesisTasks);
                using UnityWebRequest request = new UnityWebRequest(url, "POST");
                byte[] bodyRaw = Encoding.UTF8.GetBytes(payload);
                request.uploadHandler = new UploadHandlerRaw(bodyRaw);
                request.downloadHandler = new DownloadHandlerBuffer();
                request.SetRequestHeader("Content-Type", "application/json");
                yield return request.SendWebRequest();

                if (request.result !=UnityWebRequest.Result.Success)
                {
                    Debug.LogError(request.error);
                }
                else
                {
                    string responseText = request.downloadHandler.text;
                    string audioState=JsonUtility.FromJson<ResultantResult>(responseText).tasks_info[0].task_status;
                    if (audioState.Equals("Success"))
                    {
                        string audioUrl= JsonUtility.FromJson<ResultantResult>(responseText).tasks_info[0].task_result.speech_url;
                        StartCoroutine(PlayAudioClip(audioUrl, callback));
                        print(audioUrl);
                        yield break;
                    }
                }
            }
        }

        IEnumerator PlayAudioClip(string audioUrl,Action<AudioClip> callback)
        {
            using UnityWebRequest audioRequest = UnityWebRequestMultimedia.GetAudioClip(audioUrl, AudioType.WAV);
            yield return audioRequest.SendWebRequest();

            if (audioRequest.result != UnityWebRequest.Result.Success)
            {
                Debug.Log("Failed to load audio: " + audioRequest.error);
            }
            else
            {
                AudioClip audioClip = DownloadHandlerAudioClip.GetContent(audioRequest);
                callback?.Invoke(audioClip);
            }
        }
    
        private int SetReader(SpeechRole role)
        {
            return role switch
            {
                SpeechRole.度小宇 => 1,
                SpeechRole.度小美 => 0,
                SpeechRole.度逍遥 => 3,
                SpeechRole.度丫丫 => 4,
                SpeechRole.Jp度小娇 => 5,
                SpeechRole.Jp度逍遥 => 5003,
                SpeechRole.Jp度小鹿 => 5118,
                SpeechRole.Jp度博文 => 106,
                SpeechRole.Jp度小童 => 110,
                SpeechRole.Jp度小萌 => 111,
                SpeechRole.Jp度米朵 => 5,
                _ => 0
            };
        }

        #endregion

        #region 数据格式定义
    
        /// <summary>
        /// 语音合成的配置信息
        /// </summary>
        [Serializable]
        public class PostDataSetting
        {
            /// <summary>
            /// 固定值zh。语言选择,目前只有中英文混合模式,填写固定值zh
            /// </summary>
            [Header("语言设置,固定值zh")] public string lang = "zh";
            /// <summary>
            /// 语速,取值0-15,默认为5中语速
            /// </summary>
            [Header("语速,取值0-15,默认为5中语速")] public int speed = 5;
            /// <summary>
            /// 音调,取值0-15,默认为5中语调
            /// </summary>
            [Header("音调,取值0-15,默认为5中语调")] public int pitch	 = 5;
            /// <summary>
            /// 音量,取值0-15,默认为5中音量(取值为0时为音量最小值,并非为无声)
            /// </summary>
            [Header("音量,取值0-15,默认为5中音量")] public int volume = 5;
            /// <summary>
            /// 基础音库:度小宇=1,度小美=0,度逍遥(基础)=3,度丫丫=4
            /// 精品音库:度逍遥(精品)=5003,度小鹿=5118,度博文=106,度小童=110,度小萌=111,度米朵=103,度小娇=5。默认为度小美
            /// </summary>
            [Header("设置朗读的声音")] public SpeechRole per = SpeechRole.度小美;
            /// <summary>
            /// 3为mp3格式(默认); 4为pcm-16k;5为pcm-8k;6为wav(内容同pcm-16k); 注意aue=4或者6是语音识别要求的格式,
            /// 但是音频内容不是语音识别要求的自然人发音,所以识别效果会受影响。
            /// </summary>
            [Header("设置音频格式")] public string format = "wav";
        }
    
        /// <summary>
        /// 可选声音
        /// </summary>
        public enum SpeechRole
        {
            度小宇,
            度小美,
            度逍遥,
            度丫丫,
            Jp度逍遥,
            Jp度小鹿,
            Jp度博文,
            Jp度小童,
            Jp度小萌,
            Jp度米朵,
            Jp度小娇
        }
    
    
        [Serializable]
        public class Message
        {
            public string[] text;
            public string format;
            public int voice;
            public string lang;
            public int speed;
            public int pitch;
            public int volume;
            public int enable_subtitle;
        }
    
    
        /// <summary>
        /// 语音合成任务
        /// </summary>
        [System.Serializable]
        public class AudioSynthesisTasks
        {
            public string[] task_ids;
        }
    
        /// <summary>
        /// 语音合成任务信息
        /// </summary>
        [System.Serializable]
        public class AudioSynthesisTaskInfo
        {
            public int log_id;
            public string task_status;
            public string task_id;
        }
        /// <summary>
        /// 合成结果
        /// </summary>
        [System.Serializable]
        public class ResultantResult
        {
            public int log_id;
            public AudioTaskInfo[] tasks_info;

        }
        /// <summary>
        /// 音频信息
        /// </summary>
        [System.Serializable]
        public class AudioTaskInfo
        {
            public string task_status;
            public AudioTaskResult task_result;
            public string task_id;
        
        }
        [System.Serializable]
        public class AudioTaskResult
        {
            public string speech_url;
        }
    
        #endregion

    }
}

2.如何使用

using System.Collections;
using System.Collections.Generic;
using BaiDuAI;
using UnityEngine;

public class Test : MonoBehaviour
{
    public BaiduTextToSpeech TTS;

    public AudioSource audioSource;
    // Start is called before the first frame update
    void Start()
    {
        TTS.Speak("这是一段测试音频", (clip) =>
        {
            audioSource.clip = clip;
            audioSource.Play();
        });
    }

    
}

3.运行结果

  • 1
    点赞
  • 3
    收藏
    觉得还不错? 一键收藏
  • 打赏
    打赏
  • 4
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论 4
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

今天喝水了嘛.

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值