(二十二) 文本转语音、TTS、长文本、Edge-TTS
本文代码使用了Edge-TTS来进行文本转语音的操作,可以存储为mp3或wav文件。文本不限长度。
调用的是云端Edge-TTS接口,本人只是做了简单封装、编了个UI而已。
可直接运行的文件可在百度网盘下载:
https://pan.baidu.com/s/1ntMnDWFvnS7tLUd9jku8Ew?pwd=hims
代码如下:
#文本转语音工具V1.0
import asyncio
import traceback
# import librosa
import edge_tts
import os, sys, time
import cv2
import yaml
import hbt_funcs as hbt
from playsound import playsound
from PyQt5 import QtWidgets
from PyQt5.QtWidgets import QWidget, QMessageBox, QFileDialog, QApplication, QSlider
from PyQt5.QtCore import Qt, QTimer, QThread, pyqtSignal, pyqtSlot
from txt2audio_UI import Ui_txt2voice
asyncio.set_event_loop_policy(asyncio.WindowsSelectorEventLoopPolicy())
hbt.is_contains_chinese(os.getcwd()) #检查是否有中文路径
rates = '+0%'
run_flag = 0
stop_flag = False
bar = 0
voices_list = ['XiaoxiaoNeural', 'XiaoyiNeural', 'YunxiaNeural', 'liaoning-XiaobeiNeural', 'shaanxi-XiaoniNeural',
'YunjianNeural', 'YunxiNeural', 'YunyangNeural']
my_title = "iCANX文字转语音工具"
settings_file = "settings.yaml" #读取配置文件icanx_settings.yaml
settings = {}
if os.path.exists(settings_file):
with open(settings_file, 'r') as f:
settings = yaml.safe_load(f)
if settings == None: settings = {} # Happens if the file is empty.
out_dir = settings.get('out_dir', os.getcwd())
voices_select = settings.get('voices_select', 0)
mp3_wav = settings.get('mp3_wav', 0)
voices = 'zh-CN-' + voices_list[voices_select]
# from subprocess import run, PIPE, STDOUT
# def get_media_length(file_path):
# cmdline = f'ffprobe -i "{file_path}" -show_entries format=duration -v quiet -of csv="p=0"'
# # print(cmdline)
# result = run(cmdline, stdout=PIPE, stderr=STDOUT)
# try: lenth = int(float(result.stdout.decode('utf-8').strip()))
# except: lenth = 0; print('ffprobe检测长度发生错误...')
# return lenth
from mutagen.mp3 import MP3
def get_media_length(file_path):
audio = MP3(file_path)
length = audio.info.length
return length
class EdgeTTSTrans(QThread):
sinout = pyqtSignal(str)
def __init__(self, winshot, texts, filename):
super(EdgeTTSTrans, self).__init__()
self.main_win = winshot
self.rates = rates
self.texts = texts
self.filename = filename + '.mp3'
def run(self):
try:
asyncio.run(self.edge_tts_trans(self.texts))
self.sinout.emit('OK')
except: self.sinout.emit('ERROR')
async def edge_tts_trans(self, text):
communicate = edge_tts.Communicate(text=text, rate=self.rates, voice=voices)
await communicate.save(self.filename)
class PlayAudioWav(QThread):
def __init__(self, winshot, texts):
super(PlayAudioWav, self).__init__()
self.winshot = winshot
self.rates = rates
self.texts = texts
if os.path.exists("temp.mp3"): os.remove("temp.mp3")
def run(self):
asyncio.run(self.edge_tts_trans(self.texts))
try: playsound("temp.mp3")
except: traceback.print_exc()
if os.path.exists("temp.mp3"): os.remove("temp.mp3")
self.winshot.try_lisson.setEnabled(True)
async def edge_tts_trans(self, text):
self.communicate = edge_tts.Communicate(text=text, rate=self.rates, voice=voices)
try:
# loop = asyncio.get_event_loop()
await self.communicate.save('temp.mp3')
# loop.run_until_complete(communicate.save('temp.mp3'))
except: print('Error in Async...;'); traceback.print_exc()
class Winshot(QWidget, Ui_txt2voice):
def __init__(self):
super(Winshot, self).__init__()
self.start_time = 0
self.voice_len = 0
self.setupUi(self)
global hwnd, run_flag
self.createLayout()
self.setWindowTitle(my_title)
self.setWindowIcon(hbt.GetIco('ican'))
self.setFixedSize(self.size())
self.setWindowFlags(Qt.WindowMinimizeButtonHint)
self.my_timer = QTimer(self)
self.show(); run_flag = 1
self.text_len = 0
def show_error(self,str):
r_button = QMessageBox.question(self, my_title,'\n\n'+str+'\n\n', QMessageBox.Ok)
def set_False_Btn(self):
self.outButton.setEnabled(False)
self.startButton.setEnabled(False)
self.quitButton.setEnabled(False)
self.out_path.setEnabled(False)
def set_True_Btn(self):
self.outButton.setEnabled(True)
self.startButton.setEnabled(True)
self.quitButton.setEnabled(True)
self.out_path.setEnabled(True)
def start_run(self):
global stop_flag
self.save_yaml()
stop_flag = False; self.set_False_Btn()
self.start_time = time.time()
text = self.textEdit.toPlainText()
self.text_len = len(text)
if text == "" :
self.show_error('文本框里的文字不能为空... ');
self.set_True_Btn()
stop_flag = 1; return
# print('待转化的文字:', text)
self.filename = out_dir + '/' + time.strftime("%Y_%m_%d_%H.%M.%S")
# print(self.filename)
self.my_thread = EdgeTTSTrans(self, text, self.filename)
self.my_thread.sinout.connect(self.signal_coming)
self.my_thread.start()
self.my_timer.start(500)
self.my_timer.timeout.connect(self.running)
# def EdgeTTSTrans(self, text):
# async def edge_tts_trans():
# communicate = edge_tts.Communicate(text=text, rate=rates, voice=voices)
# await communicate.save('audio.mp3')
# asyncio.run(edge_tts_trans())
def signal_coming(self, str):
global stop_flag
# if os.path.exists("audio.mp3"): os.remove("audio.mp3")
if str == 'OK':
# self.voice_len = librosa.get_duration(filename=self.filename +'.mp3')
self.voice_len = get_media_length(self.filename +'.mp3')
total_time = time.time() - self.start_time
run_stat_text = f"统计信息:文本长度({self.text_len}字) | 音频长度({self.voice_len:.1f}秒) | 消耗时间({total_time:.1f}秒)"
self.run_state.setText(run_stat_text)
self.my_timer.stop(); self.progressBar.setValue(100)
cv2.waitKey(10)
r_button = QMessageBox.question(self, my_title, "\n\n\n完成本次文字转语音换过程...\n\n需要播放吗?\n\n\n",
QMessageBox.Yes | QMessageBox.No)
if r_button == QMessageBox.Yes:
try: os.startfile(self.filename +'.mp3')
except: print("无法播放文件......")
if mp3_wav == 1:
os.system(f"sysenv\\ffmpeg -i {self.filename}.mp3 {self.filename}.wav")
else:
self.show_error('转换过程中发生错误...\n可能原因:\n文件或目录不能包含中文...\n网络不通...\n网络不能使用代理...')
self.set_True_Btn()
self.progressBar.setValue(0)
def running(self):
global bar
bar += 2
total_time = time.time() - self.start_time
self.progressBar.setValue(bar)
if bar >= 100: bar = 0
run_stat_text = f"统计信息:文本长度({self.text_len}字) | 音频长度({self.voice_len:.1f}秒) | 消耗时间({total_time:.1f}秒)"
self.run_state.setText(run_stat_text)
def helpWin(self):
str="\n\n\n 本软件著作权归属:XXX 网址:www.xxx.com \n\n\n"
QMessageBox.question(self, my_title, str, QMessageBox.Ok)
def quitWin(self):
r_button = QMessageBox.question(self, "my_title",
"\n\n\n退出将终止本程序......\n\n确认退出吗?\n\n\n", QMessageBox.Yes | QMessageBox.No)
self.save_yaml()
if r_button == QMessageBox.Yes:
sys.exit()
def outButton_fuc(self):
global out_dir
tmp_path = out_dir
out_dir = QFileDialog.getExistingDirectory(self,'选择转换后的输出文件夹', out_dir)
if out_dir == '': out_dir = tmp_path
self.out_path.setText(out_dir)
# print('选择的保存目录:', out_dir)
def open_fold_fuc(self):
try: os.startfile(out_dir)
except:pass
def rates_slider_fuc(self):
global rates
self.audio_rates.setText(f'{self.rates_slider.value()}%')
_rates = self.rates_slider.value()
if _rates >= 0: rates = f'+{_rates}%'
else: rates = f'{_rates}%'
def click_audio_select(self, str1):
global voices, voices_select
voices_select = self.audio_select.currentIndex()
voices = 'zh-CN-' + voices_list[voices_select]
print('选择的声音:', voices)
def click_try_lisson(self, str1):
self.try_lisson.setEnabled(False)
text = "感谢您选择我的声音"
self.play_thread = PlayAudioWav(self, text) #启动Play线程
self.play_thread.start()
def click_checkBox_mp3(self):
global mp3_wav
mp3_wav = 0
def click_checkBox_wav(self):
global mp3_wav
mp3_wav = 1
def click_textEdit(self):
txt_len = len(self.textEdit.toPlainText())
print(self.textEdit.toPlainText())
self.run_state.setText(f"统计信息:文本长度({txt_len}字)")
def save_yaml(self):
settings = {'out_dir': out_dir, 'voices_select': voices_select, 'mp3_wav': mp3_wav}
with open(settings_file, 'w+') as f:
yaml.dump(settings, f)
def createLayout(self):
self.out_path.setText(out_dir)
if mp3_wav == 0: self.checkBox_mp3.setChecked(True)
else: self.checkBox_wav.setChecked(True)
self.checkBox_mp3.stateChanged.connect(self.click_checkBox_mp3)
self.checkBox_wav.stateChanged.connect(self.click_checkBox_wav)
self.outButton.clicked.connect(self.outButton_fuc)
self.chk_outputfile.clicked.connect(self.open_fold_fuc)
self.try_lisson.clicked.connect(self.click_try_lisson)
self.textEdit.textChanged.connect(self.click_textEdit)
self.textEdit.setPlainText("本软件使用微软Edge-TTS,快速把文字转换成语音。")
self.startButton.clicked.connect(self.start_run)
self.helpButton.clicked.connect(self.helpWin)
self.quitButton.clicked.connect(self.quitWin)
self.rates_slider.setTickPosition(QSlider.TicksAbove)
self.rates_slider.valueChanged.connect(self.rates_slider_fuc)
self.audio_select.addItems(['晓晓:女', '晓依:女', '云霞:女', '东北:女', '陕西:女', '云剑:男', '云溪:男','云阳:男'])
self.audio_select.setCurrentIndex(voices_select)
self.audio_select.activated[str].connect(self.click_audio_select)
#if __name__ == '__main__':
QApplication.setAttribute(Qt.AA_EnableHighDpiScaling)
app = QtWidgets.QApplication(sys.argv)
winshot = Winshot()
sys.exit(app.exec_())
UI代码如下:
# -*- coding: utf-8 -*-
# Form implementation generated from reading ui file 'txt2audio_UI.ui'
#
# Created by: PyQt5 UI code generator 5.15.2
#
# WARNING: Any manual changes made to this file will be lost when pyuic5 is
# run again. Do not edit this file unless you know what you are doing.
from PyQt5 import QtCore, QtGui, QtWidgets
class Ui_txt2voice(object):
def setupUi(self, txt2voice):
txt2voice.setObjectName("txt2voice")
txt2voice.resize(435, 431)
self.startButton = QtWidgets.QPushButton(txt2voice)
self.startButton.setGeometry(QtCore.QRect(160, 371, 91, 23))
font = QtGui.QFont()
font.setFamily("宋体")
font.setPointSize(9)
self.startButton.setFont(font)
self.startButton.setObjectName("startButton")
self.helpButton = QtWidgets.QPushButton(txt2voice)
self.helpButton.setGeometry(QtCore.QRect(270, 371, 61, 23))
font = QtGui.QFont()
font.setFamily("宋体")
font.setPointSize(9)
self.helpButton.setFont(font)
self.helpButton.setObjectName("helpButton")
self.quitButton = QtWidgets.QPushButton(txt2voice)
self.quitButton.setGeometry(QtCore.QRect(350, 371, 61, 23))
font = QtGui.QFont()
font.setFamily("宋体")
font.setPointSize(9)
self.quitButton.setFont(font)
self.quitButton.setObjectName("quitButton")
self.textEdit = QtWidgets.QPlainTextEdit(txt2voice)
self.textEdit.setGeometry(QtCore.QRect(20, 30, 391, 175))
self.textEdit.setObjectName("textEdit")
self.chk_outputfile = QtWidgets.QPushButton(txt2voice)
self.chk_outputfile.setGeometry(QtCore.QRect(20, 371, 61, 23))
font = QtGui.QFont()
font.setFamily("宋体")
font.setPointSize(9)
self.chk_outputfile.setFont(font)
self.chk_outputfile.setObjectName("chk_outputfile")
self.outButton = QtWidgets.QPushButton(txt2voice)
self.outButton.setGeometry(QtCore.QRect(20, 280, 61, 21))
font = QtGui.QFont()
font.setFamily("宋体")
font.setPointSize(9)
self.outButton.setFont(font)
self.outButton.setObjectName("outButton")
self.out_path = QtWidgets.QLabel(txt2voice)
self.out_path.setGeometry(QtCore.QRect(90, 280, 311, 20))
self.out_path.setObjectName("out_path")
self.lbl_3 = QtWidgets.QLabel(txt2voice)
self.lbl_3.setGeometry(QtCore.QRect(26, 222, 51, 16))
font = QtGui.QFont()
font.setFamily("宋体")
font.setPointSize(9)
self.lbl_3.setFont(font)
self.lbl_3.setObjectName("lbl_3")
self.audio_select = QtWidgets.QComboBox(txt2voice)
self.audio_select.setGeometry(QtCore.QRect(86, 221, 71, 18))
font = QtGui.QFont()
font.setFamily("宋体")
font.setPointSize(9)
self.audio_select.setFont(font)
self.audio_select.setObjectName("audio_select")
self.rates_slider = QtWidgets.QSlider(txt2voice)
self.rates_slider.setGeometry(QtCore.QRect(271, 219, 111, 20))
self.rates_slider.setMinimum(-99)
self.rates_slider.setTracking(True)
self.rates_slider.setOrientation(QtCore.Qt.Horizontal)
self.rates_slider.setInvertedAppearance(False)
self.rates_slider.setInvertedControls(False)
self.rates_slider.setObjectName("rates_slider")
self.aud = QtWidgets.QLabel(txt2voice)
self.aud.setGeometry(QtCore.QRect(211, 220, 61, 20))
font = QtGui.QFont()
font.setFamily("宋体")
font.setPointSize(9)
self.aud.setFont(font)
self.aud.setObjectName("aud")
self.lbl_4 = QtWidgets.QLabel(txt2voice)
self.lbl_4.setGeometry(QtCore.QRect(27, 336, 61, 16))
font = QtGui.QFont()
font.setFamily("宋体")
font.setPointSize(9)
self.lbl_4.setFont(font)
self.lbl_4.setObjectName("lbl_4")
self.progressBar = QtWidgets.QProgressBar(txt2voice)
self.progressBar.setGeometry(QtCore.QRect(87, 340, 321, 8))
self.progressBar.setProperty("value", 0)
self.progressBar.setTextVisible(False)
self.progressBar.setInvertedAppearance(False)
self.progressBar.setObjectName("progressBar")
self.try_lisson = QtWidgets.QPushButton(txt2voice)
self.try_lisson.setGeometry(QtCore.QRect(160, 220, 31, 21))
font = QtGui.QFont()
font.setFamily("宋体")
font.setPointSize(9)
self.try_lisson.setFont(font)
self.try_lisson.setObjectName("try_lisson")
self.line = QtWidgets.QFrame(txt2voice)
self.line.setGeometry(QtCore.QRect(0, 401, 441, 16))
self.line.setFrameShape(QtWidgets.QFrame.HLine)
self.line.setFrameShadow(QtWidgets.QFrame.Sunken)
self.line.setObjectName("line")
self.run_state = QtWidgets.QLabel(txt2voice)
self.run_state.setGeometry(QtCore.QRect(10, 410, 381, 20))
self.run_state.setObjectName("run_state")
self.audio_file_path_txt_2 = QtWidgets.QLabel(txt2voice)
self.audio_file_path_txt_2.setGeometry(QtCore.QRect(23, 10, 241, 16))
font = QtGui.QFont()
font.setFamily("宋体")
font.setPointSize(9)
self.audio_file_path_txt_2.setFont(font)
self.audio_file_path_txt_2.setObjectName("audio_file_path_txt_2")
self.lbl_5 = QtWidgets.QLabel(txt2voice)
self.lbl_5.setGeometry(QtCore.QRect(26, 252, 61, 16))
font = QtGui.QFont()
font.setFamily("宋体")
font.setPointSize(9)
self.lbl_5.setFont(font)
self.lbl_5.setObjectName("lbl_5")
self.checkBox_mp3 = QtWidgets.QCheckBox(txt2voice)
self.checkBox_mp3.setGeometry(QtCore.QRect(90, 252, 41, 16))
self.checkBox_mp3.setObjectName("checkBox_mp3")
self.buttonGroup = QtWidgets.QButtonGroup(txt2voice)
self.buttonGroup.setObjectName("buttonGroup")
self.buttonGroup.addButton(self.checkBox_mp3)
self.checkBox_wav = QtWidgets.QCheckBox(txt2voice)
self.checkBox_wav.setGeometry(QtCore.QRect(140, 252, 68, 16))
self.checkBox_wav.setObjectName("checkBox_wav")
self.buttonGroup.addButton(self.checkBox_wav)
self.line_2 = QtWidgets.QFrame(txt2voice)
self.line_2.setGeometry(QtCore.QRect(0, 310, 441, 16))
self.line_2.setFrameShape(QtWidgets.QFrame.HLine)
self.line_2.setFrameShadow(QtWidgets.QFrame.Sunken)
self.line_2.setObjectName("line_2")
self.audio_rates = QtWidgets.QLabel(txt2voice)
self.audio_rates.setGeometry(QtCore.QRect(388, 220, 31, 20))
font = QtGui.QFont()
font.setFamily("宋体")
font.setPointSize(9)
self.audio_rates.setFont(font)
self.audio_rates.setObjectName("audio_rates")
self.retranslateUi(txt2voice)
QtCore.QMetaObject.connectSlotsByName(txt2voice)
def retranslateUi(self, txt2voice):
_translate = QtCore.QCoreApplication.translate
txt2voice.setWindowTitle(_translate("txt2voice", "AI"))
self.startButton.setText(_translate("txt2voice", "开始转换"))
self.helpButton.setText(_translate("txt2voice", "帮助"))
self.quitButton.setText(_translate("txt2voice", "退出"))
self.chk_outputfile.setText(_translate("txt2voice", "查看结果"))
self.outButton.setText(_translate("txt2voice", "输出目录"))
self.out_path.setText(_translate("txt2voice", "生成完成的视频输出目录"))
self.lbl_3.setText(_translate("txt2voice", "语音选择:"))
self.aud.setText(_translate("txt2voice", "语速选择:"))
self.lbl_4.setText(_translate("txt2voice", "转换进度:"))
self.try_lisson.setText(_translate("txt2voice", "试听"))
self.run_state.setText(_translate("txt2voice", "统计信息:"))
self.audio_file_path_txt_2.setText(_translate("txt2voice", "请输入文本:"))
self.lbl_5.setText(_translate("txt2voice", "输出格式:"))
self.checkBox_mp3.setText(_translate("txt2voice", "MP3"))
self.checkBox_wav.setText(_translate("txt2voice", "WAV"))
self.audio_rates.setText(_translate("txt2voice", "0%"))