Unity接微软认知服务语音合成(Azure-TTS)

上一篇我介绍了如何接入微软认知服务的语音识别,这篇文章我将为大家介绍如何接入微软认知服务的语音合成(以下简称语音合成)。语音合成中也需要用到YourSubscriptionKey和YourServiceRegion,获取方式同语音识别,这两个密钥与语音识别是通用的,所用到的SDK与语音识别的也是一样。首先我们还是要新建一个场景,如下图所示

然后新建脚本,命名为MicroSoftTTSDemo,然后编辑脚本,将以下内容粘贴到脚本中

using UnityEngine;
using UnityEngine.UI;
using Microsoft.CognitiveServices.Speech;
using System;
using System.IO;
using System.Collections;
using Microsoft.CognitiveServices.Speech.Audio;
using System.Threading.Tasks;
using UnityEngine.Networking;

public class MicroSoftTTSDemo : MonoBehaviour
{
    // Hook up the three properties below with a Text, InputField and Button object in your UI.
    public Text outputText;
    public InputField inputField;
    public Button speakButton;
    public AudioSource audioSource;

    private object threadLocker = new object();
    private bool waitingForSpeak;
    private string message;
    public string path;
    public string fileName;
    public string audioType;
    public DateTime startTime;
    public DateTime endTime;
    public Text timeText;
    void Start()
    {
        if (outputText == null)
        {
            UnityEngine.Debug.LogError("outputText property is null! Assign a UI Text element to it.");
        }
        else if (inputField == null)
        {
            message = "inputField property is null! Assign a UI InputField element to it.";
            UnityEngine.Debug.LogError(message);
        }
        else if (speakButton == null)
        {
            message = "speakButton property is null! Assign a UI Button to it.";
            UnityEngine.Debug.LogError(message);
        }
        else
        {
            // Continue with normal initialization, Text, InputField and Button objects are present.
            //inputField.text = "Enter text you wish spoken here.";
            //message = "Click button to synthesize speech";
            speakButton.onClick.AddListener(ButtonClick);
        }
    }

    void Update()
    {
        lock (threadLocker)
        {
            if (speakButton != null)
            {
                speakButton.interactable = !waitingForSpeak;
            }

            if (outputText != null)
            {
                outputText.text = message;
            }
        }
    }

    public void ButtonClick()
    {
        startTime = DateTime.Now;
        // Creates an instance of a speech config with specified subscription key and service region.
        // Replace with your own subscription key and service region (e.g., "westus").
        var config = SpeechConfig.FromSubscription("YourSubscriptionKey", "YourServiceRegion");
        // Creates a speech synthesizer.
        // Make sure to dispose the synthesizer after use!
        using (var synthsizer = new SpeechSynthesizer(config, null))
        {
            lock (threadLocker)
            {
                waitingForSpeak = true;
            }
            // Starts speech synthesis, and returns after a single utterance is synthesized.
            var result = synthsizer.SpeakTextAsync(inputField.text).Result;
            //print("after   " + DateTime.Now);

            // Checks result.
            string newMessage = string.Empty;

            if (result.Reason == ResultReason.SynthesizingAudioCompleted)
            {
                // Since native playback is not yet supported on Unity yet (currently only supported on Windows/Linux Desktop),
                // use the Unity API to play audio here as a short term solution.
                // Native playback support will be added in the future release.
                var sampleCount = result.AudioData.Length / 2;
                var audioData = new float[sampleCount];
                for (var i = 0; i < sampleCount; ++i)
                {
                    audioData[i] = (short)(result.AudioData[i * 2 + 1] << 8 | result.AudioData[i * 2]) / 32768.0F;

                }
                // The default output audio format is 16K 16bit mono
                var audioClip = AudioClip.Create("SynthesizedAudio", sampleCount, 1, 16000, false);
                audioClip.SetData(audioData, 0);
                audioSource.clip = audioClip;
                audioSource.Play();
                endTime = DateTime.Now;
                timeText.text = "合成前时间:" + startTime + "     合成成功时间:" + endTime;

                newMessage = "Speech synthesis succeeded!";
            }

            else if (result.Reason == ResultReason.Canceled)
            {
                var cancellation = SpeechSynthesisCancellationDetails.FromResult(result);
                newMessage = $"CANCELED:\nReason=[{cancellation.Reason}]\nErrorDetails=[{cancellation.ErrorDetails}]\nDid you update the subscription info?";
            }

            lock (threadLocker)
            {
                message = newMessage;
                waitingForSpeak = false;
            }
        }
    }
}

保存脚本后点击运行,在InputFeild中输入英文文本,然后点击Button,即可播放你输入的英文。至此语音合成就已经完成。微软语音合成的发音与真人发音一模一样,不会有机械化的感觉。

下篇文章将为大家介绍微软语音SDK接好后打包所遇到的坑

  • 5
    点赞
  • 8
    收藏
    觉得还不错? 一键收藏
  • 3
    评论
1.注册 Microsoft Azure 帐户并创建语音服务。您可以在 Microsoft Azure 门户中创建自己的语音服务。在创建语音服务时,您需要选择订阅、位置和定价层。根据您的预算和需求,选择一个合适的定价层。创建好语音服务后,记下该服务的订阅密钥和区域。 2.下载 Microsoft Cognitive Services SDK for Unity。在 Microsoft Cognitive Services SDK for Unity 网站上下载最新版本的 SDK。解压缩下载的文件,并将其添加到 Unity 项目中。 3.编写代码。在 Unity 中编写代码以与 Microsoft Cognitive Services SDK 进行交互。以下是一个示例代码: ``` using UnityEngine; using UnityEngine.Networking; using System.Collections; using System.Collections.Generic; using UnityEngine.UI; public class TextToSpeech : MonoBehaviour { public Text text; public AudioSource audioSource; private string subscriptionKey = "YOUR_SUBSCRIPTION_KEY_HERE"; private string region = "YOUR_REGION_HERE"; private string endpoint = "https://" + region + ".tts.speech.microsoft.com/cognitiveservices/v1"; private string voiceName = "en-US-Jessa24kRUS"; private float pitch = 0.5f; private float rate = 0.5f; private string format = "riff-24khz-16bit-mono-pcm"; private string authToken = ""; private string audioData = ""; private bool isSpeaking = false; IEnumerator GetToken() { UnityWebRequest www = UnityWebRequest.Post("https://" + region + ".api.cognitive.microsoft.com/sts/v1.0/issueToken", ""); www.SetRequestHeader("Ocp-Apim-Subscription-Key", subscriptionKey); yield return www.SendWebRequest(); if (www.isNetworkError || www.isHttpError) { Debug.Log(www.error); } else { authToken = www.downloadHandler.text; } } IEnumerator GetAudio() { List<IMultipartFormSection> formData = new List<IMultipartFormSection>(); formData.Add(new MultipartFormDataSection("text", text.text)); formData.Add(new MultipartFormDataSection("voice", voiceName)); formData.Add(new MultipartFormDataSection("pitch", pitch.ToString())); formData.Add(new MultipartFormDataSection("rate", rate.ToString())); formData.Add(new MultipartFormDataSection("format", format)); UnityWebRequest www = UnityWebRequest.Post(endpoint, formData); www.SetRequestHeader("Authorization", "Bearer " + authToken); www.timeout = 60; www.downloadHandler = new DownloadHandlerBuffer(); yield return www.SendWebRequest(); if (www.isNetworkError || www.isHttpError) { Debug.Log(www.error); } else { audioData = www.downloadHandler.text; audioSource.clip = WavUtility.ToAudioClip(audioData); audioSource.Play(); isSpeaking = false; } } public void Speak() { if (!isSpeaking) { isSpeaking = true; StartCoroutine(GetToken()); StartCoroutine(GetAudio()); } } } ``` 在代码中,我们使用了 Microsoft Cognitive Services SDK for Unity 中的 TTS (Text-to-Speech) API。我们首先需要获取一个访问令牌,然后使用该令牌向 API 发送请求来获取音频数据。然后我们使用 WavUtility.ToAudioClip() 方法将音频数据转换为 AudioClip,并将其播放出来。 4.在 Unity 中使用 TextToSpeech 类。在 Unity 中创建一个新的游戏对象,并将 TextToSpeech 脚本添加到该对象中。将您要转换为语音的文本添加到 Text 组件中。在 Unity 中单击 “Play” 按钮,您应该可以听到该文本转换为语音的声音。 这就是将微软语音合成Unity 的基本步骤。您可以根据自己的需求和技能水平对代码进行修改和扩展。
评论 3
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值