语音合成
语音合成TTS,由微软提供的合成引擎,不过电脑上的语音引擎还是有点机械,效果不是很好
这几年,几大公司也退出了自己的语音引擎,之所以选择阿里云的语音引擎,是因为,可以提供3月免费语音合成功能,合成效果虽然不是非常好,但也可以了。
对于将一篇文章合成音频,本文分三部分进行分解:
1)阿里云SKD 合成音频demo
2)讲合成的多段wav音频,合成一整段完整音频
3)主进程调用SDK子进程, 并将多段音频合成一整段的 解决方案
目录
前提
注册阿里云,选择 智能语音交互 ,
https://help.aliyun.com/document_detail/130555.html?spm=a2c4g.11186623.6.625.30102a7djABrmE
一、阿里云示例代码
下载阿里云示例后,并不能直接运行,还必须自己处理一番,很是麻烦
自己整理了一份VS2013编译的可用的c++示例代码,可用下载尝试。
下面是重要的两个DEMO,tts demo token demo
1.1 TTS语音合成示例
1)设置语速、朗读者等信息
int processGETRequest(string appKey, string token, string text,
string audioSaveFile, string format, int sampleRate) {
CURL* curl = NULL;
CURLcode res;
curl = curl_easy_init();
if (curl == NULL) {
return -1;
}
string url = "https://nls-gateway.cn-shanghai.aliyuncs.com/stream/v1/tts";
/**
* 设置HTTPS URL请求参数
*/
ostringstream oss;
oss << url;
oss << "?appkey=" << appKey;
oss << "&token=" << token;
oss << "&text=" << text;
oss << "&format=" << format;
oss << "&sample_rate=" << sampleRate;
// voice 发音人,可选,默认是xiaoyun
oss << "&voice=" << speaker;// "siqi";
// volume 音量,范围是0~100,可选,默认50
oss << "&volume=" << 95;
// speech_rate 语速,范围是-500~500,可选,默认是0
oss << "&speech_rate=" << -300;
// pitch_rate 语调,范围是-500~500,可选,默认是0
// oss << "&pitch_rate=" << 0;
string request = oss.str();
cout << request << endl;
curl_easy_setopt(curl, CURLOPT_URL, request.c_str());
2)合成的音频文件
ofstream fs;
fs.open(audioSaveFile.c_str(), ios::out | ios::binary);
if (!fs.is_open()) {
cout << "The audio save file can not open!";
return -1;
}
fs.write(bodyContent.c_str(), bodyContent.size());
fs.close();
1.2 获取TOKEN
因为免费的token有3小时限制,所以经常更新
代码如下(示例):
unsigned int expireTime = 0;
//获取访问令牌TokenId
int getTokenId(const char* keySecret, const char* keyId) {
NlsToken nlsTokenRequest;
/*设置阿里云账号KeySecret*/
nlsTokenRequest.setKeySecret(keySecret);
/*设置阿里云账号KeyId*/
nlsTokenRequest.setAccessKeyId(keyId);
/*获取token. 成功返回0, 失败返回-1*/
if (-1 == nlsTokenRequest.applyNlsToken()) {
cout << "Failed: " << nlsTokenRequest.getErrorMsg() << endl; /*获取失败原因*/
return -1;
} else {
cout << "TokenId: " << nlsTokenRequest.getToken() << endl; /*获取TokenId*/
cout << "TokenId expireTime: " << nlsTokenRequest.getExpireTime() << endl; /*获取Token有效期时间戳(秒)*/
expireTime = nlsTokenRequest.getExpireTime();
WritePrivateProfileString("Aliyun", "Token", nlsTokenRequest.getToken(), "control\\preSet.ini");
return 0;
}
}
二 合成多个音频
阿里云免费合成音频有300字限制,所以一篇长文章,要自己划分成小段,然后将生成的多个音频,再自己合成一个完整的音频。
合成wav音频,有网上有开源的程序
bool CombineWaveFile(int argc, char argv[][256], char *pOutFileName)
{
if (argc <= 0 || 0 == strlen(pOutFileName))
{
return false;
}
char strTemp[1024] = { 0 };
RIFF_HEADER *pRiff_Header = new RIFF_HEADER[argc];
FMT_BLOCK *pFmt_Block = new FMT_BLOCK[argc];
FACT_BLOCK *pFact_Block = new FACT_BLOCK[argc];
DATA_BLOCK *pData_Block = new DATA_BLOCK[argc];
int *pFileHandle = new int[argc];
for (int i = 0; i < argc; i++)
{
memset(pRiff_Header + i, 0, sizeof(RIFF_HEADER));
memset(pFmt_Block + i, 0, sizeof(FMT_BLOCK));
memset(pFact_Block + i, 0, sizeof(FACT_BLOCK));
memset(pData_Block + i, 0, sizeof(DATA_BLOCK));
memset(pFileHandle + i, 0, sizeof(int));
*(pFileHandle + i) = (int)(fopen(argv[i], "rb"));
if (NULL == (pFileHandle + i))
{
DelArray(pRiff_Header);
DelArray(pFmt_Block);
DelArray(pFact_Block);
DelArray(pData_Block);
DelArray(pFileHandle);
return false;
}
}
for (int i = 0; i < argc; i++)
{
int nRead = fread(pRiff_Header + i, 1, sizeof(RIFF_HEADER), (FILE*)(pFileHandle[i]));
if (nRead != sizeof(RIFF_HEADER))
{
DelArray(pRiff_Header);
DelArray(pFmt_Block);
DelArray(pFact_Block);
DelArray(pData_Block);
DelArray(pFileHandle);
return false;
}
nRead = fread(pFmt_Block + i, 1, 8, (FILE*)(*(pFileHandle + i)));
if (8 != nRead)
{
DelArray(pRiff_Header);
DelArray(pFmt_Block);
DelArray(pFact_Block);
DelArray(pData_Block);
DelArray(pFileHandle);
return false;
}
nRead = fread(&((pFmt_Block + i)->wavFormat), 1, (pFmt_Block + i)->dwFmtSize, (FILE*)(*(pFileHandle + i)));
if ((pFmt_Block + i)->dwFmtSize != nRead)
{
DelArray(pRiff_Header);
DelArray(pFmt_Block);
DelArray(pFact_Block);
DelArray(pData_Block);
DelArray(pFileHandle);
return false;
}
nRead = fread(pFact_Block + i, 1, 8, (FILE*)(*(pFileHandle + i)));
if (8 != nRead)
{
DelArray(pRiff_Header);
DelArray(pFmt_Block);
DelArray(pFact_Block);
DelArray(pData_Block);
DelArray(pFileHandle);
return false;
}
//判断是fact字段还是data字段
if (0 == strncmp((pFact_Block + i)->szFactID, "fact", 4))
{
nRead = fread(&((pFact_Block + i)->dwData), 1, sizeof((pFact_Block + i)->dwData), (FILE*)(*(pFileHandle + i)));
if (sizeof((pFact_Block + i)->dwData) != nRead)
{
DelArray(pRiff_Header);
DelArray(pFmt_Block);
DelArray(pFact_Block);
DelArray(pData_Block);
DelArray(pFileHandle);
return false;
}
nRead = fread(pData_Block + i, 1, sizeof(DATA_BLOCK), (FILE*)(*(pFileHandle + i)));
if (nRead != sizeof(DATA_BLOCK))
{
DelArray(pRiff_Header);
DelArray(pFmt_Block);
DelArray(pFact_Block);
DelArray(pData_Block);
DelArray(pFileHandle);
return false;
}
}
else if (0 == strncmp((pFact_Block + i)->szFactID, "data", 4))//如果没有fact段就判断是不是data段
{
memcpy(pData_Block + i, pFact_Block + i, sizeof(DATA_BLOCK));
memset(pFact_Block + i, 0, sizeof(FACT_BLOCK));
}
else
{
DelArray(pRiff_Header);
DelArray(pFmt_Block);
DelArray(pFact_Block);
DelArray(pData_Block);
DelArray(pFileHandle);
return false;
}
}
//开始合并
for (int i = 1; i < argc; i++)
{
pRiff_Header[0].dwRiffSize += pData_Block[i].dwDataSize;
pData_Block[0].dwDataSize += pData_Block[i].dwDataSize;
}
FILE *pOutFile = fopen(pOutFileName, "wb");
if (NULL == pOutFile)
{
DelArray(pRiff_Header);
DelArray(pFmt_Block);
DelArray(pFact_Block);
DelArray(pData_Block);
DelArray(pFileHandle);
return false;
}
int nWrite = fwrite(&(pRiff_Header[0]), 1, sizeof(RIFF_HEADER), pOutFile);
nWrite = fwrite(&(pFmt_Block[0]), 1, 8, pOutFile);
nWrite = fwrite(&(pFmt_Block[0].wavFormat), 1, pFmt_Block[0].dwFmtSize, pOutFile);
if (pFact_Block[0].dwFactSize)
{
nWrite = fwrite(&(pFact_Block[0]), 1, sizeof(FACT_BLOCK), pOutFile);
}
nWrite = fwrite(&(pData_Block[0]), 1, sizeof(DATA_BLOCK), pOutFile);
for (int i = 0; i < argc; i++)
{
memset(strTemp, 0, sizeof(strTemp));
int nRet = fread(strTemp, 1, sizeof(strTemp) - 1, (FILE*)(*(pFileHandle + i)));
while (0 != nRet)
{
fwrite(strTemp, 1, nRet, pOutFile);
nRet = fread(strTemp, 1, sizeof(strTemp) - 1, (FILE*)(*(pFileHandle + i)));
}
fclose((FILE*)(*(pFileHandle + i)));
}
fclose(pOutFile);
DelArray(pRiff_Header);
DelArray(pFmt_Block);
DelArray(pFact_Block);
DelArray(pData_Block);
DelArray(pFileHandle);
return 1;
}
三 整体解决方案
SDK有300字限制,所以一篇文章,使用SDK合成后,会生成多段音频,
这不是我们所希望的
我们希望的是: 输入文字,然后生成一整段音频。
要实现这个,可以这样解决:
1)输入文章
2)解析文章,生成300字以内小段音频
3)合成各个音频
主进程以管道方式调用控制台程序的的示例:
BOOL ExcuteCmd(wstring path,wstring &content) {
const long MAX_COMMAND_SIZE = 10000; // 命令行输出缓冲大小
CString nOptionFile;
wchar_t szTemp[256] = {0};
nOptionFile.Format(L"%s%s", g_strPath, L"control\\preSet.ini");
GetPrivateProfileString(L"Aliyun", L"Token", L"", szTemp, 256, nOptionFile);
if (_tclen(szTemp) > 0)
{
g_AliyunToken = szTemp;
}
CString szFetCmd = g_strPath+ L"aliyun\\tts.exe ";
szFetCmd.Append(g_AliyunToken+L" ");
CString pathC(path.c_str());
CString contentC(content.c_str());
pathC.Replace(L" ", L"");
pathC.Replace(L" ", L"");
pathC.Replace(L" ", L"");
szFetCmd.Append(pathC);
szFetCmd.Append(L" ");
szFetCmd.Append(g_AliyunSpeaker + L" ");
szFetCmd.Append(contentC);
const std::string strEnSearch = "UUID"; // 主板序列号的前导信息
BOOL bret = FALSE;
HANDLE hReadPipe = NULL; //读取管道
HANDLE hWritePipe = NULL; //写入管道
PROCESS_INFORMATION pi; //进程信息
memset(&pi, 0, sizeof(pi));
STARTUPINFOA si; //控制命令行窗口信息
memset(&si, 0, sizeof(si));
SECURITY_ATTRIBUTES sa; //安全属性
memset(&sa, 0, sizeof(sa));
char szBuffer[MAX_COMMAND_SIZE + 1] = { 0 }; // 放置命令行结果的输出缓冲区
unsigned long count = 0;
long ipos = 0;
pi.hProcess = NULL;
pi.hThread = NULL;
si.cb = sizeof(STARTUPINFO);
sa.nLength = sizeof(SECURITY_ATTRIBUTES);
sa.lpSecurityDescriptor = NULL;
sa.bInheritHandle = TRUE;
//1.创建管道
bret = CreatePipe(&hReadPipe, &hWritePipe, &sa, 0);
if (!bret) {
CloseHandle(hWritePipe);
CloseHandle(hReadPipe);
return bret;
}
//2.设置命令行窗口的信息为指定的读写管道
GetStartupInfoA(&si);
si.hStdError = hWritePipe;
si.hStdOutput = hWritePipe;
si.wShowWindow = SW_HIDE; //隐藏命令行窗口
si.dwFlags = STARTF_USESHOWWINDOW | STARTF_USESTDHANDLES;
CStringA sA(szFetCmd.GetBuffer());
//3.创建获取命令行的进程
bret = CreateProcessA(NULL, sA.GetBuffer(), NULL, NULL, TRUE, 0, NULL, NULL, &si, &pi);
if (!bret) {
CloseHandle(hWritePipe);
CloseHandle(hReadPipe);
CloseHandle(pi.hProcess);
CloseHandle(pi.hThread);
return bret;
}
CloseHandle(hWritePipe); //先关闭输入管道 否则while (ReadFile....)会无限等待下去
//4.读取返回的数据
WaitForSingleObject(pi.hProcess, 200);
//读取命令行返回值
std::string strRet;
while (ReadFile(hReadPipe, szBuffer, MAX_COMMAND_SIZE, &count, NULL))
{
strRet.append(szBuffer, count);
}
//5.查找主板ID
//CloseHandle(hWritePipe);
CloseHandle(hReadPipe);
CloseHandle(pi.hProcess);
CloseHandle(pi.hThread);
return TRUE;
}
有合成音频方面的需求的话,可以私信,目前已实现了 TXT小说到分章节音频的 完整流程