首先你要有阿里云账号,其次要去创建一个accesskey,然后在阿里云的智能语音交互里添加语音合成项目,这方面阿里的官方文档已经很详细了,就不再多说。下载示例代码不要下载编译好的,去GITHUB下载!!!去GITHUB下载!!!去GITHUB下载!!!(被坑吐血了)
#pragma once
#include <windows.h>
#include <ctime>
#include <map>
#include <string>
#include <iostream>
#include <vector>
#include <fstream>
#include <process.h>
#include "nlsEvent.h"
#include "nlsClient.h"
#include "curl/curl.h"
#include "jsoncpp\json.h"
#include "speechSynthesizerRequest.h"
#include "nlsCommonSdk/Token.h"
#pragma comment(lib,"libcurl.lib")
#pragma comment(lib,"jsoncpp.lib")
#pragma comment(lib,"alibabacloud-idst-common.lib")
#pragma comment(lib,"alibabacloud-idst-speech.lib")
///标准操作
using std::map;
using std::string;
using std::vector;
using std::cout;
using std::endl;
using std::ifstream;
using std::ofstream;
using std::ios;
///阿里相关操作
using namespace AlibabaNlsCommon;
using AlibabaNls::NlsClient;
using AlibabaNls::NlsEvent;
using AlibabaNls::LogDebug;
using AlibabaNls::LogInfo;
using AlibabaNls::SpeechSynthesizerRequest;
//自己申请的accesskey
struct AliAccessKey
{
string id;
string secret;
};
///自己配置的项目信息
struct SpeechVerify
{
string appKey;//项目key
string spokesman;//说话人
string filename;//保存合成文件完整的名字
};
class AliSpeech
{
protected:
AliSpeech();
~AliSpeech();
public:
static AliSpeech *GetSpeech();
public:
bool InitAliyun(AliAccessKey key);
bool InitAliyun(string id, string secret);
bool DeInitAliyun();
///选择语音合成模式"DIALECT_"开头的宏定义
bool SelectSpeech(const char *dialect);
///合成语音
bool SyntheticSpeech(const char *text);
是否初始化
bool IsInited();
是否忙碌
bool IsBusying();
///根据输入的方言名获取对应的设置信息
SpeechVerify GetSpeechVerify(string dialect);
protected:
/*初始化map*/
void InitSpeechVerifyMap();
/*根据AccessKey ID和AccessKey Secret重新生成一个token,并获取其有效期时间戳*/
bool UpdateToken(string akId, string akSecret, string* token, long* expireTime);
/*开启一个线程主要用于发送和接收*/
static unsigned int WINAPI SpeechThread(LPVOID lpParam);
private:
string m_Dialect;
string m_SpeechWord_UTF8;
AliAccessKey m_accessKey;
std::map<string,SpeechVerify> m_SpeechVerifyMap;
};
//使用英语合成必须输入的是英文
//中文需要用有道API转换一下
namespace YouDaoAPI
{
int CN2EN(std::string cn, std::string &en);
int Get(const std::string& strUrl, std::string& strResponse);
}
///阿里暂时不支持一个项目多种语言
///只能你建立多个项目
#define DIALECT_PUTONGHUA "普通话"
#define DIALECT_ENABLE_PUTONGHUA 1
#define DIALECT_YINGYU "英语"
#define DIALECT_ENABLE_YINGYU 1
#define DIALECT_SICHUAN "四川话"
#define DIALECT_ENABLE_SICHUAN 1
#define DIALECT_GUANGDONG "广东话"
#define DIALECT_ENABLE_GUANGDONG 1
#define DIALECT_TAIWAN "台湾话"
#define DIALECT_ENABLE_TAIWAN 1
#define DIALECT_DONGBEI "东北话"
#define DIALECT_ENABLE_DONGBEI 1
#define DIALECT_TIANJING "天津话"
#define DIALECT_ENABLE_TIANJING 1
#include "stdafx.h"
#include "AliSpeech.h"
#define SPEECH_WORK_THREAD 4
#define SPEECH_EXPIRE_TIME 10
#define REQUEST_VOLUME 50//音量, 范围是0~100, 默认50
#define REQUEST_FORMAT "wav"//音频编码格式,支持的格式pcm, wav, mp3,默认是wav.
#define REQUEST_SAMPLERATE 16000//音频采样率, 包含8000, 16000.默认是16000
#define REQUEST_SPEECHRATE 0//语速, 范围是-500~500, 默认是0
#define REQUEST_PITCHRATE 0//语调, 范围是-500~500, 默认是0
bool g_Busying=false;
long g_ExpireTime = -1;///时间戳
string g_AccessToken = "";///token
ofstream g_AudioFile;///音频文件
//************************************************************************************
//****************阿里官方提供的回调函数**********************************************
//************************************************************************************
/**********************************************************************
**sdk在接收到云端返回合成结束消息时, sdk内部线程上报Completed事件,
**上报Completed事件之后,SDK内部会关闭识别连接通道.
*********************************************************************/
void OnSynthesisCompleted(NlsEvent* cbEvent, void* cbParam);
/********************************************************************************
**合成过程发生异常时, sdk内部线程上报TaskFailed事件
**上报TaskFailed事件之后,SDK内部会关闭识别连接通道.
**不允许在回调函数内部调用stop(), releaseRecognizerRequest()对象操作, 否则会异常
*********************************************************************************/
void OnSynthesisTaskFailed(NlsEvent* cbEvent, void* cbParam);
/**********************************************************************************
**识别结束或发生异常时,会关闭连接通道, sdk内部线程上报ChannelCloseed事件
**不允许在回调函数内部调用stop(), releaseRecognizerRequest()对象操作, 否则会异常
*********************************************************************************/
void OnSynthesisChannelClosed(NlsEvent* cbEvent, void* cbParam);
/**********************************************************************************
**文本上报服务端之后, 收到服务端返回的二进制音频数据, SDK内部线程通过BinaryDataRecved事件上报给用户
**不允许在回调函数内部调用stop(), releaseRecognizerRequest()对象操作, 否则会异常
*********************************************************************************/
void OnBinaryDataRecved(NlsEvent* cbEvent, void* cbParam);
AliSpeech::AliSpeech()
{
g_Busying = false;
m_Dialect = DIALECT_PUTONGHUA;
}
AliSpeech::~AliSpeech()
{
DeInitAliyun();
}
AliSpeech * AliSpeech::GetSpeech()
{
static AliSpeech ali;
return &ali;
}
bool AliSpeech::InitAliyun(AliAccessKey key)
{
if(key.id.empty()|| key.secret.empty())
return false;
m_accessKey = key;
int ret = NlsClient::getInstance()->setLogConfig("SpeechLogError.log", AlibabaNls::LogLevel::LogError);
if (-1 == ret)
return false;
//启动工作线程
NlsClient::getInstance()->startWorkThread(SPEECH_WORK_THREAD);
InitSpeechVerifyMap();
return true;
}
bool AliSpeech::InitAliyun(string id, string secret)
{
AliAccessKey key;
key.id = id;
key.secret = secret;
return InitAliyun(key);
}
bool AliSpeech::DeInitAliyun()
{
// 所有工作完成,进程退出前,释放nlsClient. 请注意, releaseInstance()非线程安全.
NlsClient::releaseInstance();
return true;
}
bool AliSpeech::SelectSpeech(const char * dialect)
{
if (strcmp(dialect, DIALECT_PUTONGHUA) == 0)
{
m_Dialect = DIALECT_PUTONGHUA;
}
else if(strcmp(dialect, DIALECT_YINGYU) == 0)
{
m_Dialect = DIALECT_YINGYU;
}
else if (strcmp(dialect, DIALECT_SICHUAN) == 0)
{
m_Dialect = DIALECT_SICHUAN;
}
else if (strcmp(dialect, DIALECT_GUANGDONG) == 0)
{
m_Dialect = DIALECT_GUANGDONG;
}
else if (strcmp(dialect, DIALECT_TAIWAN) == 0)
{
m_Dialect = DIALECT_TAIWAN;
}
else if (strcmp(dialect, DIALECT_DONGBEI) == 0)
{
m_Dialect = DIALECT_DONGBEI;
}
else if (strcmp(dialect, DIALECT_TIANJING) == 0)
{
m_Dialect = DIALECT_TIANJING;
}
else
{
m_Dialect = DIALECT_PUTONGHUA;
return false;
}
return true;
}
string GBKToUTF8(const string &strGBK)
{
string strOutUTF8 = "";
WCHAR * str1;
int n = MultiByteToWideChar(CP_ACP, 0, strGBK.c_str(), -1, NULL, 0);
str1 = new WCHAR[n];
MultiByteToWideChar(CP_ACP, 0, strGBK.c_str(), -1, str1, n);
n = WideCharToMultiByte(CP_UTF8, 0, str1, -1, NULL, 0, NULL, NULL);
char * str2 = new char[n];
WideCharToMultiByte(CP_UTF8, 0, str1, -1, str2, n, NULL, NULL);
strOutUTF8 = str2;
delete[] str1;
str1 = NULL;
delete[] str2;
str2 = NULL;
return strOutUTF8;
}
bool AliSpeech::SyntheticSpeech(const char * text)
{
if (!IsInited()||IsBusying()||!text)return false;
std::time_t curTime = std::time(0);
///判断token有没有过期
if (g_ExpireTime - curTime < SPEECH_EXPIRE_TIME)
{
//时间戳过期,重新申请
if (!UpdateToken(m_accessKey.id, m_accessKey.secret, &g_AccessToken, &g_ExpireTime))
{
return false;
}
}
防打扰
g_Busying = true;
m_SpeechWord_UTF8 = GBKToUTF8(text);
if (m_SpeechWord_UTF8.empty())
return false;
开启新线程
HANDLE threadHandle;
unsigned threadId;
threadHandle = (HANDLE)_beginthreadex(NULL, 0, &SpeechThread, (LPVOID)this, 0, &threadId);
WaitForSingleObject(threadHandle, INFINITE);
CloseHandle(threadHandle);
return true;
}
bool AliSpeech::IsInited()
{
if (m_accessKey.id.empty() || m_accessKey.secret.empty())
return false;
return true;
}
bool AliSpeech::IsBusying()
{
return g_Busying;
}
SpeechVerify AliSpeech::GetSpeechVerify(string dialect)
{
SpeechVerify speech;
std::map<string,SpeechVerify>::iterator it; //定义一个条目变量(实际是指针)
it = m_SpeechVerifyMap.find(dialect.c_str());
if (it == m_SpeechVerifyMap.end())
{
//没找到
speech = m_SpeechVerifyMap.at(0);
}
else
{
speech = it->second;
}
return speech;
}
///这部分的appkey只能你自己建立项目了,不会提供
///发音人,自己看一下接口
void AliSpeech::InitSpeechVerifyMap()
{
SpeechVerify speech;
#if DIALECT_ENABLE_PUTONGHUA
speech.appKey = "";
speech.spokesman = "xiaoyun";
speech.filename = "audio/pth.wav";
m_SpeechVerifyMap.insert(std::make_pair(DIALECT_PUTONGHUA, speech));
#endif // DIALECT_ENABLE_PUTONGHUA
#if DIALECT_ENABLE_YINGYU
speech.appKey = "";
speech.spokesman = "ava";
speech.filename = "audio/yy.wav";
m_SpeechVerifyMap.insert(std::make_pair(DIALECT_YINGYU, speech));
#endif // DIALECT_ENABLE_YINGYU
#if DIALECT_ENABLE_SICHUAN
speech.appKey = "";
speech.spokesman = "chuangirl";
speech.filename = "audio/sc.wav";
m_SpeechVerifyMap.insert(std::make_pair(DIALECT_SICHUAN, speech));
#endif // DIALECT_ENABLE_SICHUAN
#if DIALECT_ENABLE_GUANGDONG
speech.appKey = "";
speech.spokesman = "jiajia";
speech.filename = "audio/gd.wav";
m_SpeechVerifyMap.insert(std::make_pair(DIALECT_GUANGDONG, speech));
#endif // DIALECT_ENABLE_GUANGDONG
#if DIALECT_ENABLE_TAIWAN
speech.appKey = "";
speech.spokesman = "qingqing";
speech.filename = "audio/tw.wav";
m_SpeechVerifyMap.insert(std::make_pair(DIALECT_TAIWAN, speech));
#endif // DIALECT_ENABLE_TAIWAN
#if DIALECT_ENABLE_DONGBEI
speech.appKey = "";
speech.spokesman = "dahu";
speech.filename = "audio/db.wav";
m_SpeechVerifyMap.insert(std::make_pair(DIALECT_DONGBEI, speech));
#endif // DIALECT_ENABLE_DONGBEI
#if DIALECT_ENABLE_TIANJING
speech.appKey = "";
speech.spokesman = "aikan";
speech.filename = "audio/tj.wav";
m_SpeechVerifyMap.insert(std::make_pair(DIALECT_TIANJING, speech));
#endif // DIALECT_ENABLE_TIANJING
}
bool AliSpeech::UpdateToken(string akId, string akSecret, string * token, long * expireTime)
{
NlsToken nlsTokenRequest;
nlsTokenRequest.setAccessKeyId(akId);
nlsTokenRequest.setKeySecret(akSecret);
///
if (-1 == nlsTokenRequest.applyNlsToken())
{
char log[MAX_PATH];
ZeroMemory(log, MAX_PATH);
sprintf_s(log, "Failed:%s\n", nlsTokenRequest.getErrorMsg());
OutputDebugString(log);
return false;
}
*token = nlsTokenRequest.getToken();
*expireTime = nlsTokenRequest.getExpireTime();
return true;
}
unsigned int AliSpeech::SpeechThread(LPVOID lpParam)
{
AliSpeech *pSpeech = (AliSpeech *)lpParam;
if (!pSpeech)
return -1;//获取失败
SpeechVerify verify = pSpeech->GetSpeechVerify(pSpeech->m_Dialect);
g_AudioFile.open(verify.filename.c_str() , ios::binary | ios::out);
/*创建语音识别SpeechSynthesizerRequest对象*/
SpeechSynthesizerRequest* request = NlsClient::getInstance()->createSynthesizerRequest();
if (request == NULL)
{
OutputDebugString("createSynthesizerRequest failed.\n");
g_AudioFile.close();
return 0;
}
/*设置回调函数*/
// 设置音频合成结束回调函数
request->setOnSynthesisCompleted(OnSynthesisCompleted, NULL);
// 设置音频合成通道关闭回调函数
request->setOnChannelClosed(OnSynthesisChannelClosed, NULL);
// 设置异常失败回调函数
request->setOnTaskFailed(OnSynthesisTaskFailed, NULL);
// 设置文本音频数据接收回调函数
request->setOnBinaryDataReceived(OnBinaryDataRecved, NULL);
/*设置参数*/
request->setAppKey(verify.appKey.c_str()); // 设置AppKey, 必填参数, 请参照官网申请
request->setText(pSpeech->m_SpeechWord_UTF8.c_str()); // 设置待合成文本, 必填参数. 文本内容必须为UTF-8编码
request->setVoice(verify.spokesman.c_str()); // 发音人, 包含"xiaoyun", "ruoxi", "xiaogang"等. 可选参数, 默认是xiaoyun
request->setVolume(REQUEST_VOLUME); // 音量, 范围是0~100, 可选参数, 默认50
request->setFormat(REQUEST_FORMAT); // 音频编码格式, 可选参数, 默认是wav. 支持的格式pcm, wav, mp3
request->setSampleRate(REQUEST_SAMPLERATE); // 音频采样率, 包含8000, 16000. 可选参数, 默认是16000
request->setSpeechRate(REQUEST_SPEECHRATE); // 语速, 范围是-500~500, 可选参数, 默认是0
request->setPitchRate(REQUEST_PITCHRATE); // 语调, 范围是-500~500, 可选参数, 默认是0
request->setToken(g_AccessToken.c_str()); // 设置账号校验token, 必填参数
/*start()为阻塞操作, 发送start指令之后, 会等待服务端响应, 或超时之后才返回.
* 调用start()之后, 文本被发送至云端. SDk接到云端返回的合成音频数据,会通过OnBinaryDataRecved回调函数上报至用户进程.*/
if (request->start() < 0)
{
OutputDebugString("start() failed.\n");
NlsClient::getInstance()->releaseSynthesizerRequest(request); // start()失败,释放request对象
g_AudioFile.close();
return NULL;
}
/*start()返回之后,关闭识别连接通道.
* stop()为阻塞操作, 在接受到服务端响应, 或者超时之后, 才会返回.*/
request->stop();
/*识别结束, 释放request对象*/
NlsClient::getInstance()->releaseSynthesizerRequest(request);
return NULL;
}
void OnSynthesisCompleted(NlsEvent* cbEvent, void* cbParam)
{
char log[MAX_PATH];
ZeroMemory(log, MAX_PATH);
sprintf_s(log, "OnSynthesisCompleted:[status:%d][task:%d]\n", cbEvent->getStatusCode(), cbEvent->getTaskId());
OutputDebugString(log);
}
void OnSynthesisTaskFailed(NlsEvent* cbEvent, void* cbParam)
{
char log[MAX_PATH];
ZeroMemory(log, MAX_PATH);
sprintf_s(log, "OnSynthesisTaskFailed:[status:%d][task:%d][error:%s]\n", cbEvent->getStatusCode(), cbEvent->getTaskId(), cbEvent->getErrorMessage());
OutputDebugString(log);
}
//这个是识别流程的最后一步
void OnSynthesisChannelClosed(NlsEvent* cbEvent, void* cbParam)
{
char log[MAX_PATH];
ZeroMemory(log, MAX_PATH);
sprintf_s(log, "OnRecognitionChannelCloseed: All response:%s\n", cbEvent->getAllResponse());
OutputDebugString(log);
g_AudioFile.close();
Sleep(300);///给一段时间处理数据
g_Busying = false;
}
///接收二进制流
void OnBinaryDataRecved(NlsEvent* cbEvent, void* cbParam)
{
vector<unsigned char> data = cbEvent->getBinaryData(); // getBinaryData() 获取文本合成的二进制音频数据
char log[MAX_PATH];
ZeroMemory(log, MAX_PATH);
sprintf_s(log, "OnBinaryDataRecved:[Size:%d]\n", data.size());
OutputDebugString(log);
// 以追加形式将二进制音频数据写入文件
if (data.size() > 0)
{
g_AudioFile.write((char*)&data[0], data.size());
}
}
/使用有道API中文转英文
static size_t OnWriteData(void* buffer, size_t size, size_t nmemb, void* lpVoid)
{
std::string* str = dynamic_cast<std::string*>((std::string *)lpVoid);
if (NULL == str || NULL == buffer)
{
return -1;
}
char* pData = (char*)buffer;
str->append(pData, size * nmemb);
return nmemb;
}
int YouDaoAPI::CN2EN(std::string cn, std::string &en)
{
std::string json;
std::string url = "https://fanyi.youdao.com/translate?&doctype=json&type=AUTO&i=" + cn;
YouDaoAPI::Get(url, json);
Json::Reader reader;
Json::Value root;
if (reader.parse(json, root))
{
根据具体返回的json解析一下
en=root["translateResult"][0][0]["tgt"].asString();
return 1;
}
return 0;
}
int YouDaoAPI::Get(const std::string & strUrl, std::string & strResponse)
{
CURLcode res;
CURL* curl = curl_easy_init();
if (NULL == curl)
{
return CURLE_FAILED_INIT;
}
curl_easy_setopt(curl, CURLOPT_URL, strUrl.c_str());
curl_easy_setopt(curl, CURLOPT_READFUNCTION, NULL);
curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, OnWriteData);
curl_easy_setopt(curl, CURLOPT_WRITEDATA, (void *)&strResponse);
// 当多个线程都使用超时处理的时候,同时主线程中有sleep或是wait等操作。
// 如果不设置这个选项,libcurl将会发信号打断这个wait从而导致程序退出。
curl_easy_setopt(curl, CURLOPT_NOSIGNAL, 1);
//curl_easy_setopt(curl, CURLOPT_CONNECTTIMEOUT, 3);
//curl_easy_setopt(curl, CURLOPT_TIMEOUT, 3);
curl_easy_setopt(curl, CURLOPT_SSL_VERIFYPEER, false);
curl_easy_setopt(curl, CURLOPT_SSL_VERIFYHOST, false);
res = curl_easy_perform(curl);
curl_easy_cleanup(curl);
return res;
}