python 阿里云平台合成语音(TTS)

10 篇文章 0 订阅
1 篇文章 0 订阅

1. 安装阿里云python SDK core:
 pip3 install aliyun-python-sdk-core-v3

2. 安装ali_speech python SDK, 从github上下载

https://github.com/aliyun/alibabacloud-nls-python-sdk

解压之后,安装

cd  alibabacloud-nls-python-sdk
sudo python3 setup.py install

3. 增加阿里云speech配置文件,命名为 ali_wav_config

4. 生成语音

执行脚本:

./ali_wav.py  ./words.txt

#!/usr/bin/env python3

# -*- coding: utf-8 -*-
import sys
import os
import threading
import ali_speech
import logging
import time
import json
import base64
from ali_speech.callbacks import SpeechSynthesizerCallback
from ali_speech.constant import TTSFormat
from ali_speech.constant import TTSSampleRate
from aliyunsdkcore.client import AcsClient
from aliyunsdkcore.request import CommonRequest

words_file = ""
config_file_name = "/ali_wav_config"
speaker = "xiaoyun"
volume = 50
speech_rate = 0
pitch_rate = 0
thread_list = []
MAX_THREAD = 10

class MyCallback(SpeechSynthesizerCallback):
    # 参数name用于指定保存音频的文件
    def __init__(self, name):
        self._name = name
        self._fout = open(name, 'wb')

    def on_binary_data_received(self, raw):
        #print('MyCallback.on_binary_data_received: %s' % len(raw))
        self._fout.write(raw)

    def on_completed(self, message):
        #print('MyCallback.OnRecognitionCompleted: %s' % message)
        self._fout.close()
        #使用ffmpeg 工具将wav文件中的静音部分剪切掉,如果不剪切也可以
        os.system("ffmpeg -i " + self._name + \
                  " -af silenceremove=start_periods=1:" + \
                  "start_duration=0:start_threshold=-100dB:" + \
                  "stop_periods=1:stop_duration=2:stop_threshold=-100dB -y -ac 1 -ar 16000 " + \
                  self._name + " > /dev/null 2>&1")
        print(self._name + " Done!!!")

    def on_task_failed(self, message):
        #print('MyCallback.OnRecognitionTaskFailed-task_id:%s, status_text:%s' % (
        #    message['header']['task_id'], message['header']['status_text']))
        self._fout.close()

    def on_channel_closed(self):
        print('MyCallback.OnRecognitionChannelClosed')

    def on_metainfo(self, message):
        print('MyCallback.on_metainfo: %s' % message)


def process(client, appkey, token, text, audio_name):
    global speaker,volume,speech_rate,pitch_rate
    callback = MyCallback(audio_name)
    synthesizer = client.create_synthesizer(callback)
    synthesizer.set_appkey(appkey)
    synthesizer.set_token(token)
    synthesizer.set_voice(speaker)
    synthesizer.set_text(text)
    synthesizer.set_format(TTSFormat.WAV)
    synthesizer.set_sample_rate(TTSSampleRate.SAMPLE_RATE_16K)
    synthesizer.set_volume(volume)
    synthesizer.set_speech_rate(speech_rate)
    synthesizer.set_pitch_rate(pitch_rate)

    try:
        ret = synthesizer.start()
        if ret < 0:
            return ret

        synthesizer.wait_completed()
    except Exception as e:
        print(e)
    finally:
        synthesizer.close()


def process_multithread(client, appkey, token, text, audio_name):
    global thread_list
    thread = threading.Thread(target=process, args=(client, appkey, token, text, audio_name))
    thread_list.append(thread)
    thread.start()

def get_token():
    client = AcsClient(
        "xxxxxxxxxxxxxxxx",
        base64.b64decode(b'xxxxxxxxxxxxxxxxxxxxx').decode(),
        "cn-shanghai"
    );

    # 创建request,并设置参数
    request = CommonRequest()
    request.set_method('POST')
    request.set_domain('nls-meta.cn-shanghai.aliyuncs.com')
    request.set_version('2019-02-28')
    request.set_action_name('CreateToken')
    response = client.do_action_with_exception(request)

    content = json.loads(response.decode())
    token = content['Token']['Id']

    return token

def main():
    global words_file,speaker,volume,speech_rate,pitch_rate,config_file_name,thread_list
    words_file = sys.argv[1]
    if len(words_file) == 0:
        print("ali tts arg error")
        print(sys._getframe().f_lineno)
        return
    
    if os.path.exists(words_file) == False:
        print("file " + words_file + " not exist")
        print(sys._getframe().f_lineno)
        return
    
    config_file = os.path.dirname(sys.argv[0])
    
    config_file_name = config_file + config_file_name
    
    if os.path.exists(config_file_name) == False:
        print("config file " + config_file_name + " not exist")
        print(sys._getframe().f_lineno)
        return
    
    with open(config_file_name,'r',encoding='utf-8') as config_f:
        for line in config_f:
            value = line.strip().replace("\n","").replace("\r","").replace("\t","").replace('\"',"")
            if len(value) == 0:
                continue
            if value[0] == "#":
                continue
            lst = value.split("=")
            if len(lst):
                if lst[0].find("speaker") != -1:
                    speaker = lst[-1].strip().lower()
                elif lst[0].find("volume") != -1:
                    volume = int(lst[-1].strip())
                elif lst[0].find("speech_rate") != -1:
                    speech_rate = int(lst[-1].strip())
                elif lst[0].find("pitch_rate") != -1:
                    pitch_rate = int(lst[-1].strip())
    """
    print(speaker)
    print(volume)
    print(speech_rate)
    print(pitch_rate)
    """
        
    client = ali_speech.NlsClient()
    # 设置输出日志信息的级别:DEBUG、INFO、WARNING、ERROR
    client.set_log_level('ERROR')

    appkey = 'xxxxxxxxxxxx'
    token = get_token()

    wav_path = os.path.dirname(words_file)
    
    with open(words_file,'r',encoding='utf-8') as r_file:
        r_lines = r_file.readlines()
        word_idx = 0
        for line in r_lines:
            text = line.replace("\n","").replace("\r","").replace("\t"," ")
            text = ' '.join(text.split())
            if len(text):
                if word_idx < 10:
                    audio_name = wav_path + "/00" + str(word_idx) + "-" + text.replace(" ","-") + ".wav"
                elif word_idx < 100:
                    audio_name = wav_path + "/0" + str(word_idx) + "-" + text.replace(" ","-") + ".wav"
                else:
                    audio_name = wav_path + "/" + str(word_idx) + "-" + text.replace(" ","-") + ".wav"
                process_multithread(client, appkey, token, text, audio_name)
                word_idx += 1
            global MAX_THREAD
            if (word_idx % MAX_THREAD == 0):
                for thread in thread_list:
                    thread.join()

                thread_list = []
    for thread in thread_list:
        thread.join()
    os.system("stty sane")

if __name__ == "__main__":
    if len(sys.argv) < 2:
        print("ali tts arg error")
        print(sys._getframe().f_lineno)
        exit()
    main()

 

  • 2
    点赞
  • 9
    收藏
    觉得还不错? 一键收藏
  • 4
    评论
TTS即文本语音合成(Text-to-Speech),是一种将计算机生成的文本转换为自然语音的技术。而Python语言是一种非常流行的编程语言,因其易学易用的特点,被广泛应用于各种领域。 在Python中,我们可以使用第三方库来实现TTS功能。其中,最常用的库是pyttsx3。使用pyttsx3库,我们可以加载指定的语音引擎,并设置其属性,如语速、语调等。接下来,我们可以调用其say()方法,并传入文本内容,即可实现语音合成。 使用pyttsx3库进行TTS语音合成功能的实现,我们需要先安装该库。可以通过以下命令在终端中进行安装: ``` pip install pyttsx3 ``` 安装完成后,我们可以编写Python代码来实现TTS语音合成功能。例如,下面是一个简单的示例: ```python import pyttsx3 def text_to_speech(text): engine = pyttsx3.init() engine.say(text) engine.runAndWait() if __name__ == '__main__': text_to_speech('欢迎使用TTS语音合成!') ``` 在上述示例中,我们导入了pyttsx3库,并定义了一个text_to_speech()函数用于接收文本内容并进行语音合成。在main函数中,我们调用text_to_speech()函数,并传入欢迎语句进行合成。 当我们运行上述代码时,语音合成引擎会将输入的文本内容转换成自然语音,并播放出来。同时,我们还可以通过使用更多的参数和方法,对语音引擎的属性进行设定,以实现更加个性化的语音合成效果。 总之,PythonTTS语音合成功能可以通过pyttsx3库来实现,为我们提供了便捷的文本转语音的方式,可以应用于语音导航、语音交互等各种场景中。
评论 4
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值