至此已做好了编写语音程序的准备工作,可以开始编写语音程序了。下面首先介绍文本-语音转换的编程技术。
11.2.1 构造CText2Speech类
为了便于使用Speech SDK提供的文本-语音转换COM接口,笔者编写了一个类CText2Speech,其中封装了文本-语音转换COM接口的基本方法。借助该类来编写文本-语音转换程序非常方便。
先来讨论该CText2Speech类的设计,其定义文件列举如下:
///
// active speech engine
//
#include <atlbase.h>
extern CComModule _Module;
#include <atlcom.h>
#include "sapi.h"
#include <sphelper.h>
///
// speech message
//
#define WM_TTSEVENT WM_USER+101
///
// text-to-speech class
//
class CText2Speech
{
public:
CText2Speech();
virtual ~CText2Speech();
// initialize
BOOL Initialize(HWND hWnd = NULL);
void Destroy();
// speak
HRESULT Speak(const WCHAR *pwcs, DWORD dwFlags = SPF_DEFAULT);
HRESULT Pause();
HRESULT Resume();
// rate
HRESULT SetRate(long lRateAdjust);
HRESULT GetRate(long* plRateAdjust);
// volume
HRESULT SetVolume(USHORT usVolume);
HRESULT GetVolume(USHORT* pusVolume);
// voice
ULONG GetVoiceCount();
HRESULT GetVoice(WCHAR **ppszDescription, ULONG lIndex = -1);
HRESULT SetVoice(WCHAR **ppszDescription);
// error string
CString GetErrorString()
{
return m_sError;
}
// interface
CComPtr<ISpVoice> m_IpVoice;
private:
CString m_sError;
};
文件的开始几行语句:
#include <atlbase.h>
extern CComModule _Module;
#include <atlcom.h>
#include "sapi.h"
#include <sphelper.h>
用于使我们的代码能操作Speech SDK中的相关的接口、函数和常量。
Speech SDK支持事件。为了与窗口交互,这里在类中定义了消息WM_TTSEVENT。当发生Speech事件时,向相应的窗口发送WM_TTSEVENT消息。在窗口中响应该消息就响应了相应的事件。
CText2Speech类中定义了一个操作Text-To-Speech引擎的接口指针m_IpVoice,作为数据成员,其定义如下:
CComPtr<ISpVoice> m_IpVoice;
几乎所有的Text-To-Speech操作都是借助该指针来调用IspVoice接口的方法而实现的。
CText2Speech类实现了如下的方法:
//
初始化和释放函数
BOOL Initialize(HWND hWnd = NULL);
void Destroy();
//
语音操作函数
HRESULT Speak(const WCHAR *pwcs, DWORD dwFlags = SPF_DEFAULT);
HRESULT Pause();
HRESULT Resume();
//
语速函数
HRESULT SetRate(long lRateAdjust);
HRESULT GetRate(long* plRateAdjust);
//
音量函数
HRESULT SetVolume(USHORT usVolume);
HRESULT GetVolume(USHORT* pusVolume);
//
语言函数
ULONG GetVoiceCount();
HRESULT GetVoice(WCHAR **ppszDescription, ULONG lIndex = -1);
HRESULT SetVoice(WCHAR **ppszDesc)
;
//
获取错误信息函数
CString GetErrorString()
CText2Speech类的构造函数用于初始化Text-To-Speech引擎接口指针m_IpVoice和错误字符串;析构函数则调用释放引擎的Destroy()函数释放语音引擎,其代码如下:
CText2Speech::CText2Speech()
{
m_IpVoice = NULL;
m_sError=_T("");
}
CText2Speech::~CText2Speech()
{
Destroy();
}
初始化函数Initialize首先初始化COM库,并调用CoCreateInstance方法初始化语音引擎。然后设置必须响应的引擎事件,并指定响应事件消息的窗口句柄。该窗口句柄是作为函数的参数传入的。Initialize函数的代码如下:
BOOL CText2Speech::Initialize(HWND hWnd)
{
if (FAILED(CoInitialize(NULL)))
{
m_sError=_T("Error intialization COM");
return FALSE;
}
HRESULT hr;
hr = m_IpVoice.CoCreateInstance(CLSID_SpVoice);
if (FAILED(hr))
{
m_sError=_T("Error creating voice");
return FALSE;
}
hr = m_IpVoice->SetInterest(SPFEI(SPEI_VISEME), SPFEI(SPEI_VISEME));
if (FAILED(hr))
{
m_sError=_T("Error creating interest...seriously");
return FALSE;
}
if (::IsWindow(hWnd))
{
hr = m_IpVoice->SetNotifyWindowMessage(hWnd, WM_TTSEVENT, 0, 0);
if (FAILED(hr))
{
m_sError=_T("Error setting notification window");
return FALSE;
}
}
return TRUE;
}
释放函数则释放语音引擎接口和COM库,其代码如下:
void CText2Speech::Destroy()
{
if (m_IpVoice)
m_IpVoice.Release();
CoUninitialize();
}
语音、语速、音量函数都是通过m_IpVoice成员直接调用ISpVoice接口的相关方法来实现的:
HRESULT CText2Speech::Speak(const WCHAR *pwcs, DWORD dwFlags)
{return m_IpVoice->Speak(pwcs, dwFlags, NULL);}
HRESULT CText2Speech::Pause()
{return m_IpVoice->Pause();}
HRESULT CText2Speech::Resume()
{ return m_IpVoice->Resume();}
// rate
HRESULT CText2Speech::SetRate(long lRateAdjust)
{return m_IpVoice->SetRate(lRateAdjust);}
HRESULT CText2Speech::GetRate(long* plRateAdjust)
{ return m_IpVoice->GetRate(plRateAdjust);}
// volume
HRESULT CText2Speech::SetVolume(USHORT usVolume)
{ return m_IpVoice->SetVolume(usVolume);}
HRESULT CText2Speech::GetVolume(USHORT* pusVolume)
{ return m_IpVoice->GetVolume(pusVolume);}
语言函数的实现比较复杂。由于IspVoice接口提供的语言函数,都只与抽象的语音语言接口ISpObjectToken相关,而我们能看到的却是语音语言的描述,比如,通过控制面板的语音程序所能见到的就是语音语言的描述。因此,笔者设计了直接对语音语言进行操作的语言函数,包括获取系统中已安装的语音语言数目,设置指定的语音语言,获取指定的语音语言描述(包括当前设定的语音语言)。它们的代码如下:
ULONG CText2Speech::GetVoiceCount()
{
HRESULT hr = S_OK;
CComPtr<ISpObjectToken> cpVoiceToken;
CComPtr<IEnumSpObjectTokens> cpEnum;
ULONG ulCount = -1;
//Enumerate the available voices
hr = SpEnumTokens(SPCAT_VOICES, NULL, NULL, &cpEnum);
if(FAILED(hr))
{
m_sError = _T("Error to enumerate voices");
return -1;
}
//Get the number of voices
hr = cpEnum->GetCount(&ulCount);
if(FAILED(hr))
{
m_sError = _T("Error to get voice count");
return -1;
}
return ulCount;
}
HRESULT CText2Speech::GetVoice(WCHAR **ppszDescription, ULONG lIndex)
{
HRESULT hr = S_OK;
CComPtr<ISpObjectToken> cpVoiceToken;
CComPtr<IEnumSpObjectTokens> cpEnum;
ULONG ulCount = 0;
if (lIndex == -1)
{
// current voice
//
hr = m_IpVoice->GetVoice(&cpVoiceToken);
if(FAILED(hr))
{
m_sError = _T("Error to get current voice");
return hr;
}
SpGetDescription(cpVoiceToken, ppszDescription);
if(FAILED(hr))
{
m_sError = _T("Error to get current voice description");
return hr;
}
}
else
{
// else other voices, we should enumerate the voice list first
//Enumerate the available voices
hr = SpEnumTokens(SPCAT_VOICES, NULL, NULL, &cpEnum);
if(FAILED(hr))
{
m_sError = _T("Error to enumerate voices");
return hr;
}
//Get the number of voices
hr = cpEnum->GetCount(&ulCount);
if(FAILED(hr))
{
m_sError = _T("Error to voice count");
return hr;
}
// range control
ASSERT(lIndex >= 0);
ASSERT(lIndex < ulCount);
// Obtain specified voice id
ULONG l = 0;
while (SUCCEEDED(hr))
{
cpVoiceToken.Release();
hr = cpEnum->Next( 1, &cpVoiceToken, NULL );
if(FAILED(hr))
{
m_sError = _T("Error to get voice token");
return hr;
}
if (l == lIndex)
{
hr = SpGetDescription(cpVoiceToken, ppszDescription);
if(FAILED(hr))
{
m_sError = _T("Error to get voice description");
return hr;
}
break;