1.eSpeak
简介:
eSpeak是最为流行的开源跨平台的文本转语音程序.可以直接输出wav文件.优点:当然是跨平台.缺点发音难听.
在python中的开参考:https://www.cnblogs.com/henjay724/p/9590032.html
下载:
http://espeak.sourceforge.net/
安装:
setup_espeak-1.48.04.exe
使用:
安装完成后在目录"C:\Program Files (x86)\eSpeak"中一个TTSApp.exe执行程序,这个程序是Microsoft Speech SDK中的一个DEMO程序.
在目录"C:\Program Files (x86)\eSpeak\command_line"下有一个espeak.exe的执行程序.这才是eSpeak真正的执行程序.
使用方法如下:
# -vzh为中文发音.-f 1.txt指定输入文本文件.-w 1.wav为输出的wav文件.
# 要注意是在win10下使用espeak.exe -x -vzh "下载"播放中文时,播放的是乱七八糟的声音.
# 原因应该是字符集的问题.只能使用播放文件的方式.而且文本文件编码必须是utf-8
espeak.exe -vzh+f3 -f 1.txt #播放声音
espeak.exe -vzh+f3 -f 1.txt -w 1.wav #输出到文件不播放
2.Microsoft Speech SDK 5.1
简介:
Microsoft Speech SDK 5.1是Windows平台的文本转语音开发包..优点:使用win10默认的语音比较好听.缺点:不能跨平台,只能在windows平台上使用.
安装包:SpeechSDK51.exe
安装完成后在目录"C:\Program Files (x86)\Microsoft Speech SDK 5.1",有大量的例子程序
基础入门可学习"Microsoft Speech SDK 5.1 Help"下的
可参考的例子程序:
自己整理的例子程序
// ConsoleApplication3.cpp: 定义控制台应用程序的入口点。
//
#include "stdafx.h"
#include <windows.h>
#include "sphelper.h"
#include <iostream>
#include <sapi.h> //导入语音头文件
#include <string>
#pragma comment(lib,"sapi.lib") //导入语音头文件库
void MSSSpeak(LPCTSTR speakContent)// speakContent为LPCTSTR型的字符串,调用此函数即可将文字转为语音
{
ISpVoice *pVoice = NULL;
//初始化COM接口
if (FAILED(::CoInitialize(NULL)))
MessageBox(NULL, (LPCWSTR)L"COM接口初始化失败!", (LPCWSTR)L"提示", MB_ICONWARNING | MB_CANCELTRYCONTINUE | MB_DEFBUTTON2);
//获取SpVoice接口
HRESULT hr = CoCreateInstance(CLSID_SpVoice, NULL, CLSCTX_ALL, IID_ISpVoice, (void**)&pVoice);
if (SUCCEEDED(hr))
{
pVoice->SetVolume((USHORT)100); //设置音量,范围是 0 -100
pVoice->SetRate(0); //设置速度,范围是 -10 - 10
hr = pVoice->Speak(speakContent, 0, NULL);
pVoice->Release();
pVoice = NULL;
}
//释放com资源
::CoUninitialize();
}
void MSSSpeakToWav(LPCTSTR speakContent)
{
ISpVoice *pVoice = NULL;
//初始化COM接口
if (FAILED(::CoInitialize(NULL)))
MessageBox(NULL, (LPCWSTR)L"COM接口初始化失败!", (LPCWSTR)L"提示", MB_ICONWARNING | MB_CANCELTRYCONTINUE | MB_DEFBUTTON2);
//获取SpVoice接口
HRESULT hr = CoCreateInstance(CLSID_SpVoice, NULL, CLSCTX_ALL, IID_ISpVoice, (void**)&pVoice);
USES_CONVERSION;
TCHAR szFileName[256];
WCHAR m_szWFileName[256] = L"";;
//HRESULT hr = S_OK;
CComPtr<ISpVoice> m_cpVoice;
CComPtr<ISpStreamFormat> cpOldStream;
CComPtr<ISpStream> cpWavStream;
_tcscpy_s(szFileName, _T("\0"));
/*BOOL bFileOpened = CallSaveFileDialog( szFileName,
_T("WAV (*.wav)\0*.wav\0All Files (*.*)\0*.*\0") );
if (bFileOpened == FALSE) break;*/
_tcscpy_s(szFileName, _T("E:\\20190529\\abc12345.wav"));
wcscpy_s(m_szWFileName, T2W(szFileName));
CSpStreamFormat OriginalFmt;
hr = pVoice->GetOutputStream(&cpOldStream);
if (hr == S_OK)
{
hr = OriginalFmt.AssignFormat(cpOldStream);
}
else
{
hr = E_FAIL;
}
// User SAPI helper function in sphelper.h to create a wav file
if (SUCCEEDED(hr))
{
hr = SPBindToFile(m_szWFileName, SPFM_CREATE_ALWAYS, &cpWavStream, &OriginalFmt.FormatId(), OriginalFmt.WaveFormatExPtr());
}
if (SUCCEEDED(hr))
{
// Set the voice's output to the wav file instead of the speakers
hr = pVoice->SetOutput(cpWavStream, TRUE);
}
if (SUCCEEDED(hr))
{
// Do the Speak
//HandleSpeak();
hr = pVoice->Speak(speakContent, 0, NULL);
}
// Set output back to original stream
// Wait until the speak is finished if saving to a wav file so that
// the smart pointer cpWavStream doesn't get released before its
// finished writing to the wav.
pVoice->WaitUntilDone(INFINITE);
cpWavStream.Release();
// Reset output
pVoice->SetOutput(cpOldStream, FALSE);
TCHAR szTitle[MAX_PATH];
TCHAR szConfString[MAX_PATH];
if (SUCCEEDED(hr))
{
/*LoadString(m_hInst, IDS_SAVE_NOTIFY, szConfString, MAX_PATH);
LoadString(m_hInst, IDS_NOTIFY_TITLE, szTitle, MAX_PATH);
MessageBox(m_hWnd, szConfString, szTitle, MB_OK | MB_ICONINFORMATION);*/
}
else
{
/*LoadString(m_hInst, IDS_SAVE_ERROR, szConfString, MAX_PATH);
MessageBox(m_hWnd, szConfString, NULL, MB_ICONEXCLAMATION);*/
}
//break;
}
int main()
{
//std::wstring a = L"你已经选择 Microsoft Huihui Desktop - Chinese (Simplified) 作为计算机的默认语音。";
std::wstring a = L"瑗其自, 你是大猪头.";
LPCWSTR str = a.c_str();
/*不知道为什么Cstr不行*/
MSSSpeak(str);
MSSSpeakToWav(str);
system("pause");
return 0;
}
设置windows下默认的语音包
32位系统: C:\Windows\System32\Speech\SpeechUX\sapi.cpl
64位系统: C:\Windows\SysWOW64\Speech\SpeechUX\sapi.cpl
Win8已下系统安装HuihuizXP-v1.0.zip语音包.WIN7下的中文包在播放日期格式例如:"2019-06-21 18:18:18"时,播放到秒时会变成"18",而不是"18秒"
HuihuizXP-v1.0.zip下载: https://download.csdn.net/download/yuanlin1318/11257768
在python下的使用
# python使用win32com包的的例子,最大的问题不能生成wav文件.
#!/usr/bin/env python2
# -*- coding: utf-8 -*-
# CreateDate: 2018-11-22
import win32com.client as wincl
from tkinter import *
reload(sys)
sys.setdefaultencoding('utf8')
def text2speech(text1):
text = text1.decode()
speak = wincl.Dispatch("SAPI.SpVoice")
# help(speak.GetAudioOutputs)
a = speak.GetVoices()
print(a)
speak.Speak(text)
text2speech("以上就是Python如何实现文本转语音的全部内容")
# python2 下使用myttsdll例子(可生成wav文件)
# coding=utf-8
from ctypes import *
dll = windll.LoadLibrary("MyTTSDll.dll")
print dll.Add(1, 102)
# dll.MSSSpeak("你好,中国人民银行")
# dll.MSSSpeak("How are you!")
# dll.MSSSpeak('你好,中国人民银行'.decode('utf-8'))
dll.MSSSpeak('2019-06-17 18:18:18')
print dll.MSSSpeakToWav('2019-06-17 18:18:18','time2.wav')
# python3 --需要对要TTS的字符串进行GBK转码
# 下使用myttsdll例子(可生成wav文件)
# coding=utf-8
from ctypes import *
dll = windll.LoadLibrary("MyTTSDll.dll")
print (dll.Add(1, 102))
# dll.MSSSpeak("你好,中国人民银行")
dll.MSSSpeak("How are you!".encode('gbk'))
# dll.MSSSpeak('你好,中国人民银行'.decode('utf-8'))
dll.MSSSpeak("2019-06-17 18:18:18".encode('gbk'))
print (dll.MSSSpeakToWav('2019-06-17 18:18:18','time2.wav'))
使用myttsdll时提示少msvcr100.dll的解决方法
参考文档:https://blog.csdn.net/testcs_dn/article/details/26976555
MyTTSDLL源码:https://download.csdn.net/download/yuanlin1318/11257708
源码:https://download.csdn.net/download/yuanlin1318/11257627