1. 安装阿里云python SDK core:
pip3 install aliyun-python-sdk-core-v3
2. 安装ali_speech python SDK, 从github上下载
https://github.com/aliyun/alibabacloud-nls-python-sdk
解压之后,安装
cd alibabacloud-nls-python-sdk
sudo python3 setup.py install
3. 增加阿里云speech配置文件,命名为 ali_wav_config
4. 生成语音
执行脚本:
./ali_wav.py ./words.txt
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import sys
import os
import threading
import ali_speech
import logging
import time
import json
import base64
from ali_speech.callbacks import SpeechSynthesizerCallback
from ali_speech.constant import TTSFormat
from ali_speech.constant import TTSSampleRate
from aliyunsdkcore.client import AcsClient
from aliyunsdkcore.request import CommonRequest
words_file = ""
config_file_name = "/ali_wav_config"
speaker = "xiaoyun"
volume = 50
speech_rate = 0
pitch_rate = 0
thread_list = []
MAX_THREAD = 10
class MyCallback(SpeechSynthesizerCallback):
# 参数name用于指定保存音频的文件
def __init__(self, name):
self._name = name
self._fout = open(name, 'wb')
def on_binary_data_received(self, raw):
#print('MyCallback.on_binary_data_received: %s' % len(raw))
self._fout.write(raw)
def on_completed(self, message):
#print('MyCallback.OnRecognitionCompleted: %s' % message)
self._fout.close()
#使用ffmpeg 工具将wav文件中的静音部分剪切掉,如果不剪切也可以
os.system("ffmpeg -i " + self._name + \
" -af silenceremove=start_periods=1:" + \
"start_duration=0:start_threshold=-100dB:" + \
"stop_periods=1:stop_duration=2:stop_threshold=-100dB -y -ac 1 -ar 16000 " + \
self._name + " > /dev/null 2>&1")
print(self._name + " Done!!!")
def on_task_failed(self, message):
#print('MyCallback.OnRecognitionTaskFailed-task_id:%s, status_text:%s' % (
# message['header']['task_id'], message['header']['status_text']))
self._fout.close()
def on_channel_closed(self):
print('MyCallback.OnRecognitionChannelClosed')
def on_metainfo(self, message):
print('MyCallback.on_metainfo: %s' % message)
def process(client, appkey, token, text, audio_name):
global speaker,volume,speech_rate,pitch_rate
callback = MyCallback(audio_name)
synthesizer = client.create_synthesizer(callback)
synthesizer.set_appkey(appkey)
synthesizer.set_token(token)
synthesizer.set_voice(speaker)
synthesizer.set_text(text)
synthesizer.set_format(TTSFormat.WAV)
synthesizer.set_sample_rate(TTSSampleRate.SAMPLE_RATE_16K)
synthesizer.set_volume(volume)
synthesizer.set_speech_rate(speech_rate)
synthesizer.set_pitch_rate(pitch_rate)
try:
ret = synthesizer.start()
if ret < 0:
return ret
synthesizer.wait_completed()
except Exception as e:
print(e)
finally:
synthesizer.close()
def process_multithread(client, appkey, token, text, audio_name):
global thread_list
thread = threading.Thread(target=process, args=(client, appkey, token, text, audio_name))
thread_list.append(thread)
thread.start()
def get_token():
client = AcsClient(
"xxxxxxxxxxxxxxxx",
base64.b64decode(b'xxxxxxxxxxxxxxxxxxxxx').decode(),
"cn-shanghai"
);
# 创建request,并设置参数
request = CommonRequest()
request.set_method('POST')
request.set_domain('nls-meta.cn-shanghai.aliyuncs.com')
request.set_version('2019-02-28')
request.set_action_name('CreateToken')
response = client.do_action_with_exception(request)
content = json.loads(response.decode())
token = content['Token']['Id']
return token
def main():
global words_file,speaker,volume,speech_rate,pitch_rate,config_file_name,thread_list
words_file = sys.argv[1]
if len(words_file) == 0:
print("ali tts arg error")
print(sys._getframe().f_lineno)
return
if os.path.exists(words_file) == False:
print("file " + words_file + " not exist")
print(sys._getframe().f_lineno)
return
config_file = os.path.dirname(sys.argv[0])
config_file_name = config_file + config_file_name
if os.path.exists(config_file_name) == False:
print("config file " + config_file_name + " not exist")
print(sys._getframe().f_lineno)
return
with open(config_file_name,'r',encoding='utf-8') as config_f:
for line in config_f:
value = line.strip().replace("\n","").replace("\r","").replace("\t","").replace('\"',"")
if len(value) == 0:
continue
if value[0] == "#":
continue
lst = value.split("=")
if len(lst):
if lst[0].find("speaker") != -1:
speaker = lst[-1].strip().lower()
elif lst[0].find("volume") != -1:
volume = int(lst[-1].strip())
elif lst[0].find("speech_rate") != -1:
speech_rate = int(lst[-1].strip())
elif lst[0].find("pitch_rate") != -1:
pitch_rate = int(lst[-1].strip())
"""
print(speaker)
print(volume)
print(speech_rate)
print(pitch_rate)
"""
client = ali_speech.NlsClient()
# 设置输出日志信息的级别:DEBUG、INFO、WARNING、ERROR
client.set_log_level('ERROR')
appkey = 'xxxxxxxxxxxx'
token = get_token()
wav_path = os.path.dirname(words_file)
with open(words_file,'r',encoding='utf-8') as r_file:
r_lines = r_file.readlines()
word_idx = 0
for line in r_lines:
text = line.replace("\n","").replace("\r","").replace("\t"," ")
text = ' '.join(text.split())
if len(text):
if word_idx < 10:
audio_name = wav_path + "/00" + str(word_idx) + "-" + text.replace(" ","-") + ".wav"
elif word_idx < 100:
audio_name = wav_path + "/0" + str(word_idx) + "-" + text.replace(" ","-") + ".wav"
else:
audio_name = wav_path + "/" + str(word_idx) + "-" + text.replace(" ","-") + ".wav"
process_multithread(client, appkey, token, text, audio_name)
word_idx += 1
global MAX_THREAD
if (word_idx % MAX_THREAD == 0):
for thread in thread_list:
thread.join()
thread_list = []
for thread in thread_list:
thread.join()
os.system("stty sane")
if __name__ == "__main__":
if len(sys.argv) < 2:
print("ali tts arg error")
print(sys._getframe().f_lineno)
exit()
main()