Unity 科大讯飞离线语音合成

4 篇文章 0 订阅

好久没有更新文章了,今天我们继续更新科大讯飞的语音的文章。
之前在语音合成部分由于在线语音合成的处理时间太长,所以使用了C#自带的语音合成,处理是快了,但是合成的声音特别难听。
所以今天更新一个离先语音合成的文章。
废话不多说,直接进入正题。

1.下载语音合成SDK,需要选择离线的,否者无法使用离线的。在这里插入图片描述

2.解压SDK,这里我们需要用到common.jet;xiaofeng.jet;xiaoyan.jet这三个.jet文件必须要,缺一不可,否则会报错误代码:文件缺失。然后就是msc_x64.dll库。在这里插入图片描述

3.代码部分

using UnityEngine;
using msc;
using System;
using System.Text;
using System.IO;
using System.Runtime.InteropServices;
using System.Threading;
using UnityEngine.Networking;
using System.Collections;

public class Main : MonoBehaviour
{
    private const string speekText = "北京市今天全天晴,气温7℃ ~ 19℃,空气质量优,有北风4-5级,挺凉快的。";
    private const string session_begin_params = "voice_name = xiaoyan, text_encoding = utf8, sample_rate = 16000, speed = 50, volume = 50, pitch = 50, rdn = 0";
    private string offline_session_begin_params;
    private IntPtr session_id;
    private int err_code;

    private byte[] bytes;

    private void Awake()
    {
        string xiaoyan_path = (Application.dataPath + "/TTS/xiaoyan.jet").Replace("/", "\\");
        string common_path = (Application.dataPath + "/TTS/common.jet").Replace("/", "\\");
        offline_session_begin_params = "engine_type = local, voice_name = xiaoyan, text_encoding = utf8, tts_res_path = fo|" + xiaoyan_path + ";fo|" + common_path + ", sample_rate = 16000, speed = 50, volume = 50, pitch = 50, rdn = 0";
    }

    private void Start()
    {
        int message = MSCDLL.MSPLogin("", "", "appid=5f80198b,word_dir= . ");
        if (message != (int)Errors.MSP_SUCCESS)
        {
            Debug.LogError("登录失败!错误信息:" + message);
        }
        Debug.Log("登录成功");
    }

    private void Update()
    {
        if (Input.GetMouseButtonDown(0))
        {
            //Online_TTS(speekText);
            Offline_TTS(speekText);
        }
    }

    private void Online_TTS(string speekText)
    {
        //语音合成开始
        session_id = MSCDLL.QTTSSessionBegin(session_begin_params, ref err_code);

        if (err_code != (int)Errors.MSP_SUCCESS)
        {
            Debug.LogError("初始化语音合成失败,错误信息:" + err_code);
            return;
        }
        //语音合成设置文本
        err_code = MSCDLL.QTTSTextPut(session_id, speekText, (uint)Encoding.Default.GetByteCount(speekText), string.Empty);
        if (err_code != (int)Errors.MSP_SUCCESS)
        {
            Debug.LogError("向服务器发送数据失败,错误信息:" + err_code);
            return;
        }

        uint audio_len = 0;
        SynthStatus synth_status = SynthStatus.MSP_TTS_FLAG_STILL_HAVE_DATA;
        MemoryStream memoryStream = new MemoryStream();
        memoryStream.Write(new byte[44], 0, 44);
        while (true)
        {
            IntPtr source = MSCDLL.QTTSAudioGet(session_id, ref audio_len, ref synth_status, ref err_code);
            byte[] array = new byte[audio_len];
            if (audio_len > 0)
            {
                Marshal.Copy(source, array, 0, (int)audio_len);
            }
            memoryStream.Write(array, 0, array.Length);
            Thread.Sleep(100);
            if (synth_status == SynthStatus.MSP_TTS_FLAG_DATA_END || err_code != (int)Errors.MSP_SUCCESS)
                break;
        }

        err_code = MSCDLL.QTTSSessionEnd(session_id, "");
        if (err_code != (int)Errors.MSP_SUCCESS)
        {
            Debug.LogError("会话结束失败!错误信息: " + err_code);
            return;
        }

        WAVE_Header header = getWave_Header((int)memoryStream.Length - 44);//创建wav文件头
        byte[] headerByte = StructToBytes(header);//把文件头结构转化为字节数组
        memoryStream.Position = 0;//定位到文件头
        memoryStream.Write(headerByte, 0, headerByte.Length);//写入文件头
        bytes = memoryStream.ToArray();
        memoryStream.Close();
        if (Application.streamingAssetsPath + "/" + name + ".wav" != null)
        {
            if (File.Exists(Application.streamingAssetsPath + "/" + name + ".wav"))
                File.Delete(Application.streamingAssetsPath + "/" + name + ".wav");
            File.WriteAllBytes(Application.streamingAssetsPath + "/" + name + ".wav", bytes);
            StartCoroutine(OnAudioLoadAndPaly(Application.streamingAssetsPath + "/" + name + ".wav", AudioType.WAV, gameObject.GetComponent<AudioSource>()));
        }

        Debug.Log("合成结束成功");
    }

    private void Offline_TTS(string speekText)
    {
        //语音合成开始
        session_id = MSCDLL.QTTSSessionBegin(offline_session_begin_params, ref err_code);

        if (err_code != (int)Errors.MSP_SUCCESS)
        {
            Debug.LogError("初始化语音合成失败,错误信息:" + err_code);
            return;
        }
        //语音合成设置文本
        err_code = MSCDLL.QTTSTextPut(session_id, speekText, (uint)Encoding.Default.GetByteCount(speekText), string.Empty);
        if (err_code != (int)Errors.MSP_SUCCESS)
        {
            Debug.LogError("向服务器发送数据失败,错误信息:" + err_code);
            return;
        }

        uint audio_len = 0;
        SynthStatus synth_status = SynthStatus.MSP_TTS_FLAG_STILL_HAVE_DATA;
        MemoryStream memoryStream = new MemoryStream();
        memoryStream.Write(new byte[44], 0, 44);
        while (true)
        {
            IntPtr source = MSCDLL.QTTSAudioGet(session_id, ref audio_len, ref synth_status, ref err_code);
            byte[] array = new byte[audio_len];
            if (audio_len > 0)
            {
                Marshal.Copy(source, array, 0, (int)audio_len);
            }
            memoryStream.Write(array, 0, array.Length);
            Thread.Sleep(1);
            if (synth_status == SynthStatus.MSP_TTS_FLAG_DATA_END || err_code != (int)Errors.MSP_SUCCESS)
                break;
        }

        err_code = MSCDLL.QTTSSessionEnd(session_id, "");
        if (err_code != (int)Errors.MSP_SUCCESS)
        {
            Debug.LogError("会话结束失败!错误信息: " + err_code);
            return;
        }

        WAVE_Header header = getWave_Header((int)memoryStream.Length - 44);//创建wav文件头
        byte[] headerByte = StructToBytes(header);//把文件头结构转化为字节数组
        memoryStream.Position = 0;//定位到文件头
        memoryStream.Write(headerByte, 0, headerByte.Length);//写入文件头
        bytes = memoryStream.ToArray();
        memoryStream.Close();
        if (Application.streamingAssetsPath + "/" + name + ".wav" != null)
        {
            if (File.Exists(Application.streamingAssetsPath + "/" + name + ".wav"))
                File.Delete(Application.streamingAssetsPath + "/" + name + ".wav");
            File.WriteAllBytes(Application.streamingAssetsPath + "/" + name + ".wav", bytes);
            StartCoroutine(OnAudioLoadAndPaly(Application.streamingAssetsPath + "/" + name + ".wav", AudioType.WAV, gameObject.GetComponent<AudioSource>()));
        }

        Debug.Log("合成结束成功");
    }


    /// <summary>
    /// 结构体转字符串
    /// </summary>
    /// <param name="structure"></param>
    /// <returns></returns>
    private byte[] StructToBytes(object structure)
    {
        int num = Marshal.SizeOf(structure);
        IntPtr intPtr = Marshal.AllocHGlobal(num);
        byte[] result;
        try
        {
            Marshal.StructureToPtr(structure, intPtr, false);
            byte[] array = new byte[num];
            Marshal.Copy(intPtr, array, 0, num);
            result = array;
        }
        finally
        {
            Marshal.FreeHGlobal(intPtr);
        }
        return result;
    }

    /// <summary>
    /// 结构体初始化赋值
    /// </summary>
    /// <param name="data_len"></param>
    /// <returns></returns>
    private WAVE_Header getWave_Header(int data_len)
    {
        return new WAVE_Header
        {
            RIFF_ID = 1179011410,
            File_Size = data_len + 36,
            RIFF_Type = 1163280727,
            FMT_ID = 544501094,
            FMT_Size = 16,
            FMT_Tag = 1,
            FMT_Channel = 1,
            FMT_SamplesPerSec = 16000,
            AvgBytesPerSec = 32000,
            BlockAlign = 2,
            BitsPerSample = 16,
            DATA_ID = 1635017060,
            DATA_Size = data_len
        };
    }

    /// <summary>
    /// 语音音频头
    /// </summary>
    private struct WAVE_Header
    {
        public int RIFF_ID;
        public int File_Size;
        public int RIFF_Type;
        public int FMT_ID;
        public int FMT_Size;
        public short FMT_Tag;
        public ushort FMT_Channel;
        public int FMT_SamplesPerSec;
        public int AvgBytesPerSec;
        public ushort BlockAlign;
        public ushort BitsPerSample;
        public int DATA_ID;
        public int DATA_Size;
    }

    /// <summary>
    /// UnityWebRequest 加载音频播放
    /// </summary>
    /// <param name="url">路径</param>
    /// <param name="type">音频格式</param>
    /// <param name="audio">音频</param>
    /// <returns></returns>
    public IEnumerator OnAudioLoadAndPaly(string url, AudioType type, AudioSource audio)
    {
        UnityWebRequest www = UnityWebRequestMultimedia.GetAudioClip(url, type);
        yield return www.SendWebRequest();
        if (www.isHttpError || www.isNetworkError)
            Debug.LogError(www.error);
        else
        {
            AudioClip clip = DownloadHandlerAudioClip.GetContent(www);
            audio.clip = clip;
            audio.Play();
        }
    }

    private void OnDestroy()
    {
        MSCDLL.MSPLogout();
        Debug.Log("注销成功");
    }
}

4.在线参数和离线参数区别

///在线参数
session_begin_params = "voice_name = xiaoyan, text_encoding = utf8, sample_rate = 16000, speed = 50, volume = 50, pitch = 50, rdn = 0";
///离线参数
string xiaoyan_path = (Application.dataPath + "/TTS/xiaoyan.jet").Replace("/", "\\");//这个斜杠更换前面的文章有说过,这里就不说了
string common_path = (Application.dataPath + "/TTS/common.jet").Replace("/", "\\");//不更换会报参数信息错误的问题
session_begin_params = "engine_type = local, voice_name = xiaoyan, text_encoding = utf8, tts_res_path = fo|" + xiaoyan_path + ";fo|" + common_path + ", sample_rate = 16000, speed = 50, volume = 50, pitch = 50, rdn = 0";

多了"engine_type","tts_res_path "两个参数

5.在线合成方法和离线合成方法的区别

 private void Online_TTS(string speekText)
    {
        //语音合成开始
        session_id = MSCDLL.QTTSSessionBegin(session_begin_params, ref err_code);

        if (err_code != (int)Errors.MSP_SUCCESS)
        {
            Debug.LogError("初始化语音合成失败,错误信息:" + err_code);
            return;
        }
        //语音合成设置文本
        err_code = MSCDLL.QTTSTextPut(session_id, speekText, (uint)Encoding.Default.GetByteCount(speekText), string.Empty);
        if (err_code != (int)Errors.MSP_SUCCESS)
        {
            Debug.LogError("向服务器发送数据失败,错误信息:" + err_code);
            return;
        }

        uint audio_len = 0;
        SynthStatus synth_status = SynthStatus.MSP_TTS_FLAG_STILL_HAVE_DATA;
        MemoryStream memoryStream = new MemoryStream();
        memoryStream.Write(new byte[44], 0, 44);
        while (true)
        {
            IntPtr source = MSCDLL.QTTSAudioGet(session_id, ref audio_len, ref synth_status, ref err_code);
            byte[] array = new byte[audio_len];
            if (audio_len > 0)
            {
                Marshal.Copy(source, array, 0, (int)audio_len);
            }
            memoryStream.Write(array, 0, array.Length);
            Thread.Sleep(100);
            if (synth_status == SynthStatus.MSP_TTS_FLAG_DATA_END || err_code != (int)Errors.MSP_SUCCESS)
                break;
        }

        err_code = MSCDLL.QTTSSessionEnd(session_id, "");
        if (err_code != (int)Errors.MSP_SUCCESS)
        {
            Debug.LogError("会话结束失败!错误信息: " + err_code);
            return;
        }

        WAVE_Header header = getWave_Header((int)memoryStream.Length - 44);//创建wav文件头
        byte[] headerByte = StructToBytes(header);//把文件头结构转化为字节数组
        memoryStream.Position = 0;//定位到文件头
        memoryStream.Write(headerByte, 0, headerByte.Length);//写入文件头
        bytes = memoryStream.ToArray();
        memoryStream.Close();
        if (Application.streamingAssetsPath + "/" + name + ".wav" != null)
        {
            if (File.Exists(Application.streamingAssetsPath + "/" + name + ".wav"))
                File.Delete(Application.streamingAssetsPath + "/" + name + ".wav");
            File.WriteAllBytes(Application.streamingAssetsPath + "/" + name + ".wav", bytes);
            StartCoroutine(OnAudioLoadAndPaly(Application.streamingAssetsPath + "/" + name + ".wav", AudioType.WAV, gameObject.GetComponent<AudioSource>()));
        }

        Debug.Log("合成结束成功");
    }

    private void Offline_TTS(string speekText)
    {
        //语音合成开始
        session_id = MSCDLL.QTTSSessionBegin(offline_session_begin_params, ref err_code);

        if (err_code != (int)Errors.MSP_SUCCESS)
        {
            Debug.LogError("初始化语音合成失败,错误信息:" + err_code);
            return;
        }
        //语音合成设置文本
        err_code = MSCDLL.QTTSTextPut(session_id, speekText, (uint)Encoding.Default.GetByteCount(speekText), string.Empty);
        if (err_code != (int)Errors.MSP_SUCCESS)
        {
            Debug.LogError("向服务器发送数据失败,错误信息:" + err_code);
            return;
        }

        uint audio_len = 0;
        SynthStatus synth_status = SynthStatus.MSP_TTS_FLAG_STILL_HAVE_DATA;
        MemoryStream memoryStream = new MemoryStream();
        memoryStream.Write(new byte[44], 0, 44);
        while (true)
        {
            IntPtr source = MSCDLL.QTTSAudioGet(session_id, ref audio_len, ref synth_status, ref err_code);
            byte[] array = new byte[audio_len];
            if (audio_len > 0)
            {
                Marshal.Copy(source, array, 0, (int)audio_len);
            }
            memoryStream.Write(array, 0, array.Length);
            Thread.Sleep(1);
            if (synth_status == SynthStatus.MSP_TTS_FLAG_DATA_END || err_code != (int)Errors.MSP_SUCCESS)
                break;
        }

        err_code = MSCDLL.QTTSSessionEnd(session_id, "");
        if (err_code != (int)Errors.MSP_SUCCESS)
        {
            Debug.LogError("会话结束失败!错误信息: " + err_code);
            return;
        }

        WAVE_Header header = getWave_Header((int)memoryStream.Length - 44);//创建wav文件头
        byte[] headerByte = StructToBytes(header);//把文件头结构转化为字节数组
        memoryStream.Position = 0;//定位到文件头
        memoryStream.Write(headerByte, 0, headerByte.Length);//写入文件头
        bytes = memoryStream.ToArray();
        memoryStream.Close();
        if (Application.streamingAssetsPath + "/" + name + ".wav" != null)
        {
            if (File.Exists(Application.streamingAssetsPath + "/" + name + ".wav"))
                File.Delete(Application.streamingAssetsPath + "/" + name + ".wav");
            File.WriteAllBytes(Application.streamingAssetsPath + "/" + name + ".wav", bytes);
            StartCoroutine(OnAudioLoadAndPaly(Application.streamingAssetsPath + "/" + name + ".wav", AudioType.WAV, gameObject.GetComponent<AudioSource>()));
        }

        Debug.Log("合成结束成功");
    }

线程休眠时间从100变成1
至于为什么在线的休眠时间长,缩短休眠时长无法正常合成的问题,这个还望有大佬知道的说声。以及不能没有休眠的原因。

最后:我是一菜鸡,不停努力的菜鸡。

  • 6
    点赞
  • 19
    收藏
    觉得还不错? 一键收藏
  • 打赏
    打赏
  • 3
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论 3
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

VAIN_K

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值