(二十二) 文本转语音、TTS、长文本、Edge-TTS

最新推荐文章于 2025-03-08 09:56:28 发布

置顶李贺～

最新推荐文章于 2025-03-08 09:56:28 发布

阅读量1.8k

点赞数

文章标签： edge 前端

本文链接：https://blog.csdn.net/weixin_42398606/article/details/134303150

版权

(二十二) 文本转语音、TTS、长文本、Edge-TTS

本文代码使用了Edge-TTS来进行文本转语音的操作，可以存储为mp3或wav文件。文本不限长度。
调用的是云端Edge-TTS接口，本人只是做了简单封装、编了个UI而已。
可直接运行的文件可在百度网盘下载：
https://pan.baidu.com/s/1ntMnDWFvnS7tLUd9jku8Ew?pwd=hims

在这里插入图片描述
代码如下：

#文本转语音工具V1.0
import asyncio
import traceback
# import librosa
import edge_tts
import os, sys, time
import cv2
import yaml

import hbt_funcs as hbt

from playsound import playsound
from PyQt5 import QtWidgets
from PyQt5.QtWidgets import QWidget, QMessageBox, QFileDialog, QApplication, QSlider
from PyQt5.QtCore import Qt, QTimer, QThread, pyqtSignal, pyqtSlot
from txt2audio_UI import Ui_txt2voice

asyncio.set_event_loop_policy(asyncio.WindowsSelectorEventLoopPolicy())

hbt.is_contains_chinese(os.getcwd())   #检查是否有中文路径

rates = '+0%'
run_flag = 0
stop_flag = False
bar = 0
voices_list = ['XiaoxiaoNeural', 'XiaoyiNeural', 'YunxiaNeural', 'liaoning-XiaobeiNeural', 'shaanxi-XiaoniNeural',
              'YunjianNeural', 'YunxiNeural', 'YunyangNeural']

my_title = "iCANX文字转语音工具"

settings_file = "settings.yaml"   #读取配置文件icanx_settings.yaml
settings = {}
if os.path.exists(settings_file):
    with open(settings_file, 'r') as f:
        settings = yaml.safe_load(f)
if settings == None: settings = {}   # Happens if the file is empty.
out_dir = settings.get('out_dir', os.getcwd())
voices_select = settings.get('voices_select', 0)
mp3_wav  = settings.get('mp3_wav', 0)
voices = 'zh-CN-' + voices_list[voices_select]

# from subprocess import run, PIPE, STDOUT
# def get_media_length(file_path):
#     cmdline = f'ffprobe -i "{file_path}" -show_entries format=duration -v quiet -of csv="p=0"'
#     # print(cmdline)
#     result = run(cmdline, stdout=PIPE, stderr=STDOUT)
#     try: lenth = int(float(result.stdout.decode('utf-8').strip()))
#     except: lenth = 0; print('ffprobe检测长度发生错误...')
#     return lenth

from mutagen.mp3 import MP3
def get_media_length(file_path):
    audio = MP3(file_path)
    length = audio.info.length
    return length

class EdgeTTSTrans(QThread):
    sinout = pyqtSignal(str)
    def __init__(self, winshot, texts, filename):
        super(EdgeTTSTrans, self).__init__()
        self.main_win = winshot
        self.rates = rates
        self.texts = texts
        self.filename = filename + '.mp3'

    def run(self):
        try:
            asyncio.run(self.edge_tts_trans(self.texts))
            self.sinout.emit('OK')
        except: self.sinout.emit('ERROR')

    async def edge_tts_trans(self, text):
        communicate = edge_tts.Communicate(text=text, rate=self.rates, voice=voices)
        await communicate.save(self.filename)

class PlayAudioWav(QThread):
    def __init__(self, winshot, texts):
        super(PlayAudioWav, self).__init__()
        self.winshot = winshot
        self.rates = rates
        self.texts = texts
        if os.path.exists("temp.mp3"): os.remove("temp.mp3")
    def run(self):
        asyncio.run(self.edge_tts_trans(self.texts))
        try: playsound("temp.mp3")
        except: traceback.print_exc()
        if os.path.exists("temp.mp3"): os.remove("temp.mp3")
        self.winshot.try_lisson.setEnabled(True)
    async def edge_tts_trans(self, text):
        self.communicate = edge_tts.Communicate(text=text, rate=self.rates, voice=voices)
        try:
            # loop = asyncio.get_event_loop()
            await self.communicate.save('temp.mp3')
            # loop.run_until_complete(communicate.save('temp.mp3'))
        except: print('Error in Async...;'); traceback.print_exc()

class Winshot(QWidget, Ui_txt2voice):
    def __init__(self):
        super(Winshot, self).__init__()
        self.start_time = 0
        self.voice_len = 0
        self.setupUi(self)
        global hwnd, run_flag

        self.createLayout()
        self.setWindowTitle(my_title)
        self.setWindowIcon(hbt.GetIco('ican'))
        self.setFixedSize(self.size())
        self.setWindowFlags(Qt.WindowMinimizeButtonHint)
        self.my_timer = QTimer(self)
        self.show(); run_flag = 1
        self.text_len = 0

    def show_error(self,str):
        r_button = QMessageBox.question(self, my_title,'\n\n'+str+'\n\n', QMessageBox.Ok)
    def set_False_Btn(self):
        self.outButton.setEnabled(False)
        self.startButton.setEnabled(False)
        self.quitButton.setEnabled(False)
        self.out_path.setEnabled(False)
    def set_True_Btn(self):
        self.outButton.setEnabled(True)
        self.startButton.setEnabled(True)
        self.quitButton.setEnabled(True)
        self.out_path.setEnabled(True)

    def start_run(self):
        global stop_flag
        self.save_yaml()
        stop_flag = False; self.set_False_Btn()
        self.start_time = time.time()

        text = self.textEdit.toPlainText()
        self.text_len = len(text)
        if text == "" :
            self.show_error('文本框里的文字不能为空... ');
            self.set_True_Btn()
            stop_flag = 1; return
        # print('待转化的文字：', text)

        self.filename = out_dir + '/' + time.strftime("%Y_%m_%d_%H.%M.%S")
        # print(self.filename)
        self.my_thread = EdgeTTSTrans(self, text, self.filename)
        self.my_thread.sinout.connect(self.signal_coming)
        self.my_thread.start()

        self.my_timer.start(500)
        self.my_timer.timeout.connect(self.running)

    # def EdgeTTSTrans(self, text):
    #     async def edge_tts_trans():
    #         communicate = edge_tts.Communicate(text=text, rate=rates, voice=voices)
    #         await communicate.save('audio.mp3')
    #     asyncio.run(edge_tts_trans())

    def signal_coming(self, str):
        global stop_flag
        # if os.path.exists("audio.mp3"): os.remove("audio.mp3")
        if str == 'OK':
            # self.voice_len = librosa.get_duration(filename=self.filename +'.mp3')
            self.voice_len = get_media_length(self.filename +'.mp3')
            total_time = time.time() - self.start_time
            run_stat_text = f"统计信息：文本长度({self.text_len}字) | 音频长度({self.voice_len:.1f}秒) | 消耗时间({total_time:.1f}秒)"
            self.run_state.setText(run_stat_text)
            self.my_timer.stop(); self.progressBar.setValue(100)
            cv2.waitKey(10)
            r_button = QMessageBox.question(self, my_title, "\n\n\n完成本次文字转语音换过程...\n\n需要播放吗？\n\n\n",
                                            QMessageBox.Yes | QMessageBox.No)
            if r_button == QMessageBox.Yes:
                try: os.startfile(self.filename +'.mp3')
                except: print("无法播放文件......")
            if mp3_wav == 1:
                os.system(f"sysenv\\ffmpeg -i {self.filename}.mp3 {self.filename}.wav")
        else:

            self.show_error('转换过程中发生错误...\n可能原因：\n文件或目录不能包含中文...\n网络不通...\n网络不能使用代理...')

        self.set_True_Btn()
        self.progressBar.setValue(0)

    def running(self):
        global bar
        bar += 2
        total_time = time.time() - self.start_time
        self.progressBar.setValue(bar)
        if bar >= 100: bar = 0
        run_stat_text = f"统计信息：文本长度({self.text_len}字) | 音频长度({self.voice_len:.1f}秒) | 消耗时间({total_time:.1f}秒)"
        self.run_state.setText(run_stat_text)

    def helpWin(self):
        str="\n\n\n      本软件著作权归属：XXX        网址：www.xxx.com  \n\n\n"
        QMessageBox.question(self, my_title, str, QMessageBox.Ok)
    def quitWin(self):
        r_button = QMessageBox.question(self, "my_title",
                                        "\n\n\n退出将终止本程序......\n\n确认退出吗？\n\n\n", QMessageBox.Yes | QMessageBox.No)
        self.save_yaml()
        if r_button == QMessageBox.Yes:
            sys.exit()

    def outButton_fuc(self):
        global out_dir
        tmp_path = out_dir
        out_dir = QFileDialog.getExistingDirectory(self,'选择转换后的输出文件夹', out_dir)
        if out_dir == '': out_dir = tmp_path
        self.out_path.setText(out_dir)
        # print('选择的保存目录：', out_dir)

    def open_fold_fuc(self):
        try: os.startfile(out_dir)
        except:pass
    def rates_slider_fuc(self):
        global rates
        self.audio_rates.setText(f'{self.rates_slider.value()}%')
        _rates = self.rates_slider.value()
        if _rates >= 0: rates = f'+{_rates}%'
        else: rates = f'{_rates}%'

    def click_audio_select(self, str1):
        global voices, voices_select
        voices_select = self.audio_select.currentIndex()
        voices = 'zh-CN-' + voices_list[voices_select]
        print('选择的声音：', voices)
    def click_try_lisson(self, str1):
        self.try_lisson.setEnabled(False)
        text = "感谢您选择我的声音"
        self.play_thread = PlayAudioWav(self, text)   #启动Play线程
        self.play_thread.start()
    def click_checkBox_mp3(self):
        global mp3_wav
        mp3_wav = 0
    def click_checkBox_wav(self):
        global mp3_wav
        mp3_wav = 1

    def click_textEdit(self):
        txt_len = len(self.textEdit.toPlainText())
        print(self.textEdit.toPlainText())
        self.run_state.setText(f"统计信息：文本长度({txt_len}字)")

    def save_yaml(self):
        settings = {'out_dir': out_dir, 'voices_select': voices_select, 'mp3_wav': mp3_wav}
        with open(settings_file, 'w+') as f:
            yaml.dump(settings, f)

    def createLayout(self):
        self.out_path.setText(out_dir)

        if mp3_wav == 0: self.checkBox_mp3.setChecked(True)
        else: self.checkBox_wav.setChecked(True)
        self.checkBox_mp3.stateChanged.connect(self.click_checkBox_mp3)
        self.checkBox_wav.stateChanged.connect(self.click_checkBox_wav)

        self.outButton.clicked.connect(self.outButton_fuc)
        self.chk_outputfile.clicked.connect(self.open_fold_fuc)
        self.try_lisson.clicked.connect(self.click_try_lisson)

        self.textEdit.textChanged.connect(self.click_textEdit)
        self.textEdit.setPlainText("本软件使用微软Edge-TTS，快速把文字转换成语音。")

        self.startButton.clicked.connect(self.start_run)
        self.helpButton.clicked.connect(self.helpWin)
        self.quitButton.clicked.connect(self.quitWin)

        self.rates_slider.setTickPosition(QSlider.TicksAbove)
        self.rates_slider.valueChanged.connect(self.rates_slider_fuc)

        self.audio_select.addItems(['晓晓:女', '晓依:女', '云霞:女', '东北:女', '陕西:女', '云剑:男', '云溪:男','云阳:男'])
        self.audio_select.setCurrentIndex(voices_select)
        self.audio_select.activated[str].connect(self.click_audio_select)

#if __name__ == '__main__':
QApplication.setAttribute(Qt.AA_EnableHighDpiScaling)
app = QtWidgets.QApplication(sys.argv)
winshot = Winshot()
sys.exit(app.exec_())

UI代码如下：

# -*- coding: utf-8 -*-

# Form implementation generated from reading ui file 'txt2audio_UI.ui'
#
# Created by: PyQt5 UI code generator 5.15.2
#
# WARNING: Any manual changes made to this file will be lost when pyuic5 is
# run again.  Do not edit this file unless you know what you are doing.


from PyQt5 import QtCore, QtGui, QtWidgets


class Ui_txt2voice(object):
    def setupUi(self, txt2voice):
        txt2voice.setObjectName("txt2voice")
        txt2voice.resize(435, 431)
        self.startButton = QtWidgets.QPushButton(txt2voice)
        self.startButton.setGeometry(QtCore.QRect(160, 371, 91, 23))
        font = QtGui.QFont()
        font.setFamily("宋体")
        font.setPointSize(9)
        self.startButton.setFont(font)
        self.startButton.setObjectName("startButton")
        self.helpButton = QtWidgets.QPushButton(txt2voice)
        self.helpButton.setGeometry(QtCore.QRect(270, 371, 61, 23))
        font = QtGui.QFont()
        font.setFamily("宋体")
        font.setPointSize(9)
        self.helpButton.setFont(font)
        self.helpButton.setObjectName("helpButton")
        self.quitButton = QtWidgets.QPushButton(txt2voice)
        self.quitButton.setGeometry(QtCore.QRect(350, 371, 61, 23))
        font = QtGui.QFont()
        font.setFamily("宋体")
        font.setPointSize(9)
        self.quitButton.setFont(font)
        self.quitButton.setObjectName("quitButton")
        self.textEdit = QtWidgets.QPlainTextEdit(txt2voice)
        self.textEdit.setGeometry(QtCore.QRect(20, 30, 391, 175))
        self.textEdit.setObjectName("textEdit")
        self.chk_outputfile = QtWidgets.QPushButton(txt2voice)
        self.chk_outputfile.setGeometry(QtCore.QRect(20, 371, 61, 23))
        font = QtGui.QFont()
        font.setFamily("宋体")
        font.setPointSize(9)
        self.chk_outputfile.setFont(font)
        self.chk_outputfile.setObjectName("chk_outputfile")
        self.outButton = QtWidgets.QPushButton(txt2voice)
        self.outButton.setGeometry(QtCore.QRect(20, 280, 61, 21))
        font = QtGui.QFont()
        font.setFamily("宋体")
        font.setPointSize(9)
        self.outButton.setFont(font)
        self.outButton.setObjectName("outButton")
        self.out_path = QtWidgets.QLabel(txt2voice)
        self.out_path.setGeometry(QtCore.QRect(90, 280, 311, 20))
        self.out_path.setObjectName("out_path")
        self.lbl_3 = QtWidgets.QLabel(txt2voice)
        self.lbl_3.setGeometry(QtCore.QRect(26, 222, 51, 16))
        font = QtGui.QFont()
        font.setFamily("宋体")
        font.setPointSize(9)
        self.lbl_3.setFont(font)
        self.lbl_3.setObjectName("lbl_3")
        self.audio_select = QtWidgets.QComboBox(txt2voice)
        self.audio_select.setGeometry(QtCore.QRect(86, 221, 71, 18))
        font = QtGui.QFont()
        font.setFamily("宋体")
        font.setPointSize(9)
        self.audio_select.setFont(font)
        self.audio_select.setObjectName("audio_select")
        self.rates_slider = QtWidgets.QSlider(txt2voice)
        self.rates_slider.setGeometry(QtCore.QRect(271, 219, 111, 20))
        self.rates_slider.setMinimum(-99)
        self.rates_slider.setTracking(True)
        self.rates_slider.setOrientation(QtCore.Qt.Horizontal)
        self.rates_slider.setInvertedAppearance(False)
        self.rates_slider.setInvertedControls(False)
        self.rates_slider.setObjectName("rates_slider")
        self.aud = QtWidgets.QLabel(txt2voice)
        self.aud.setGeometry(QtCore.QRect(211, 220, 61, 20))
        font = QtGui.QFont()
        font.setFamily("宋体")
        font.setPointSize(9)
        self.aud.setFont(font)
        self.aud.setObjectName("aud")
        self.lbl_4 = QtWidgets.QLabel(txt2voice)
        self.lbl_4.setGeometry(QtCore.QRect(27, 336, 61, 16))
        font = QtGui.QFont()
        font.setFamily("宋体")
        font.setPointSize(9)
        self.lbl_4.setFont(font)
        self.lbl_4.setObjectName("lbl_4")
        self.progressBar = QtWidgets.QProgressBar(txt2voice)
        self.progressBar.setGeometry(QtCore.QRect(87, 340, 321, 8))
        self.progressBar.setProperty("value", 0)
        self.progressBar.setTextVisible(False)
        self.progressBar.setInvertedAppearance(False)
        self.progressBar.setObjectName("progressBar")
        self.try_lisson = QtWidgets.QPushButton(txt2voice)
        self.try_lisson.setGeometry(QtCore.QRect(160, 220, 31, 21))
        font = QtGui.QFont()
        font.setFamily("宋体")
        font.setPointSize(9)
        self.try_lisson.setFont(font)
        self.try_lisson.setObjectName("try_lisson")
        self.line = QtWidgets.QFrame(txt2voice)
        self.line.setGeometry(QtCore.QRect(0, 401, 441, 16))
        self.line.setFrameShape(QtWidgets.QFrame.HLine)
        self.line.setFrameShadow(QtWidgets.QFrame.Sunken)
        self.line.setObjectName("line")
        self.run_state = QtWidgets.QLabel(txt2voice)
        self.run_state.setGeometry(QtCore.QRect(10, 410, 381, 20))
        self.run_state.setObjectName("run_state")
        self.audio_file_path_txt_2 = QtWidgets.QLabel(txt2voice)
        self.audio_file_path_txt_2.setGeometry(QtCore.QRect(23, 10, 241, 16))
        font = QtGui.QFont()
        font.setFamily("宋体")
        font.setPointSize(9)
        self.audio_file_path_txt_2.setFont(font)
        self.audio_file_path_txt_2.setObjectName("audio_file_path_txt_2")
        self.lbl_5 = QtWidgets.QLabel(txt2voice)
        self.lbl_5.setGeometry(QtCore.QRect(26, 252, 61, 16))
        font = QtGui.QFont()
        font.setFamily("宋体")
        font.setPointSize(9)
        self.lbl_5.setFont(font)
        self.lbl_5.setObjectName("lbl_5")
        self.checkBox_mp3 = QtWidgets.QCheckBox(txt2voice)
        self.checkBox_mp3.setGeometry(QtCore.QRect(90, 252, 41, 16))
        self.checkBox_mp3.setObjectName("checkBox_mp3")
        self.buttonGroup = QtWidgets.QButtonGroup(txt2voice)
        self.buttonGroup.setObjectName("buttonGroup")
        self.buttonGroup.addButton(self.checkBox_mp3)
        self.checkBox_wav = QtWidgets.QCheckBox(txt2voice)
        self.checkBox_wav.setGeometry(QtCore.QRect(140, 252, 68, 16))
        self.checkBox_wav.setObjectName("checkBox_wav")
        self.buttonGroup.addButton(self.checkBox_wav)
        self.line_2 = QtWidgets.QFrame(txt2voice)
        self.line_2.setGeometry(QtCore.QRect(0, 310, 441, 16))
        self.line_2.setFrameShape(QtWidgets.QFrame.HLine)
        self.line_2.setFrameShadow(QtWidgets.QFrame.Sunken)
        self.line_2.setObjectName("line_2")
        self.audio_rates = QtWidgets.QLabel(txt2voice)
        self.audio_rates.setGeometry(QtCore.QRect(388, 220, 31, 20))
        font = QtGui.QFont()
        font.setFamily("宋体")
        font.setPointSize(9)
        self.audio_rates.setFont(font)
        self.audio_rates.setObjectName("audio_rates")

        self.retranslateUi(txt2voice)
        QtCore.QMetaObject.connectSlotsByName(txt2voice)

    def retranslateUi(self, txt2voice):
        _translate = QtCore.QCoreApplication.translate
        txt2voice.setWindowTitle(_translate("txt2voice", "AI"))
        self.startButton.setText(_translate("txt2voice", "开始转换"))
        self.helpButton.setText(_translate("txt2voice", "帮助"))
        self.quitButton.setText(_translate("txt2voice", "退出"))
        self.chk_outputfile.setText(_translate("txt2voice", "查看结果"))
        self.outButton.setText(_translate("txt2voice", "输出目录"))
        self.out_path.setText(_translate("txt2voice", "生成完成的视频输出目录"))
        self.lbl_3.setText(_translate("txt2voice", "语音选择："))
        self.aud.setText(_translate("txt2voice", "语速选择："))
        self.lbl_4.setText(_translate("txt2voice", "转换进度："))
        self.try_lisson.setText(_translate("txt2voice", "试听"))
        self.run_state.setText(_translate("txt2voice", "统计信息："))
        self.audio_file_path_txt_2.setText(_translate("txt2voice", "请输入文本："))
        self.lbl_5.setText(_translate("txt2voice", "输出格式："))
        self.checkBox_mp3.setText(_translate("txt2voice", "MP3"))
        self.checkBox_wav.setText(_translate("txt2voice", "WAV"))
        self.audio_rates.setText(_translate("txt2voice", "0%"))