用SAPI实现Speech Recognition（SR） - 命令控制模式

最新推荐文章于 2020-02-08 07:00:00 发布

皓月如我

最新推荐文章于 2020-02-08 07:00:00 发布

阅读量1.6k

点赞数 3

分类专栏：多媒体 & 视频监控文章标签：语音识别微软

本文链接：https://blog.csdn.net/fm0517/article/details/77932952

版权

多媒体 & 视频监控专栏收录该内容

35 篇文章 1 订阅

订阅专栏

微软的语音识别,在这里我们简称它为SR(speech recognition),SR分为两种模式的监听：第一种模式：听写模式，即随意输入语音，监听对象将最为接近的字或者词，句反馈出来；第二种模式：命令与控制模式，划定范围监听，制定一组被选项做为监听的，用户的语音输入被反馈成最为接近的一个选项。说得通俗一些：第一种是填空题，第二种是选择题目。

之前转载的一品文章《用SAPI实现Speech Recognition（SR） - 听写模式》，介绍了“听写模式”的实现，这一篇给出“命令与控制”模式的例子程序。

#include <windows.h>  
#include <sapi.h>  
#include <stdio.h>  
#include <string.h>  
#include <atlbase.h>  
#include "sphelper.h"  

inline HRESULT BlockForResult(ISpRecoContext * pRecoCtxt, ISpRecoResult ** ppResult)
{
    HRESULT hr = S_OK;
    CSpEvent event;
    while (SUCCEEDED(hr) && SUCCEEDED(hr = event.GetFrom(pRecoCtxt)) && hr == S_FALSE)
    {
        hr = pRecoCtxt->WaitForNotifyEvent(INFINITE);
    }
    *ppResult = event.RecoResult();
    if (*ppResult)
    {
        (*ppResult)->AddRef();
    }
    return hr;
}

const WCHAR * StopWord()
{
    const WCHAR * pchStop;
    LANGID LangId = ::SpGetUserDefaultUILanguage();
    switch (LangId)
    {
    case MAKELANGID(LANG_JAPANESE, SUBLANG_DEFAULT):
        pchStop = L"}42N86\0b70e50fc0ea0e70fc/05708504608a087046";;
        break;
    default:
        pchStop = L"Stop";
        break;
    }
    return pchStop;
}

int main(int argc, char* argv[])
{
    HRESULT hr = E_FAIL;
    bool fUseTTS = true;            // turn TTS play back on or off  
    bool fReplay = true;            // turn Audio replay on or off  

    // Process optional arguments  
    if (argc > 1)
    {
        int i;
        for (i = 1; i < argc; i++)
        {
            if (_stricmp(argv[i], "-noTTS") == 0)
            {
                fUseTTS = false;
                continue;
            }
            if (_stricmp(argv[i], "-noReplay") == 0)
            {
                fReplay = false;
                continue;
            }
            printf("Usage: %s [-noTTS] [-noReplay]  ", argv[0]);
            return -1;
        }
    }

    if (SUCCEEDED(hr = ::CoInitialize(NULL)))
    {
        {
            CComPtr<ISpRecoContext> cpRecoCtxt;
            CComPtr<ISpRecoGrammar> cpRecoGrammar;
            CComPtr<ISpVoice> cpVoice;
            if (FAILED(hr = cpRecoCtxt.CoCreateInstance(CLSID_SpSharedRecoContext)))
            {
                printf("cpRecoCtxt.CoCreateInstance() fail. hr = %x", hr);
                return -2;
            }
            if (FAILED(hr = cpRecoCtxt->GetVoice(&cpVoice)))
            {
                printf("cpRecoCtxt->GetVoice() fail. hr = %x", hr);
                return -3;
            }
            if (cpRecoCtxt && cpVoice)
            {
                if (FAILED(hr = cpRecoCtxt->SetNotifyWin32Event()))
                {
                    printf("cpRecoCtxt->SetNotifyWin32Event() fail. hr = %x", hr);
                    return -4;
                }
                if (FAILED(hr = cpRecoCtxt->SetInterest(SPFEI(SPEI_RECOGNITION), SPFEI(SPEI_RECOGNITION))))
                {
                    printf("cpRecoCtxt->SetInterest() fail. hr = %x", hr);
                    return -5;
                }
                if (FAILED(hr = cpRecoCtxt->SetAudioOptions(SPAO_RETAIN_AUDIO, NULL, NULL)))
                {
                    printf("cpRecoCtxt->SetAudioOptions() fail. hr = %x", hr);
                    return -6;
                }
                if (FAILED(hr = cpRecoCtxt->CreateGrammar(7, &cpRecoGrammar)))
                {
                    printf("cpRecoCtxt->CreateGrammar() fail. hr = %x", hr);
                    return -7;
                }
                if (FAILED(hr = cpRecoGrammar->SetGrammarState(SPGS_DISABLED)))
                {
                    printf("cpRecoGrammar->SetGrammarState() fail. hr = %x", hr);
                    return -8;
                }
                if (FAILED(hr = cpRecoGrammar->LoadCmdFromFile(L"conf.xml", SPLO_DYNAMIC)))
                {
                    printf("cpRecoGrammar->LoadCmdFromFile() fail. hr = %x", hr);
                    return -9;
                }

                SPSTATEHANDLE hRule;
                if (FAILED(hr = cpRecoGrammar->GetRule(L"COMMAND", NULL, SPRAF_Active, FALSE, &hRule)))
                {
                    printf("cpRecoGrammar->GetRule() fail. hr = %x", hr);
                    return -9;
                }

                ///目前使用的是静态配置文件，以后可以研究动态加载命令///
                //if (FAILED(hr = cpRecoGrammar->ClearRule(hRule)))
                //{
                //  printf("cpRecoGrammar->ClearRule() fail. hr = %x", hr);
                //  return -10;
                //}
                //if (FAILED(hr = cpRecoGrammar->AddWordTransition(hRule, NULL, L"Frank Lee", NULL, SPWT_LEXICAL, 1, NULL)))
                //{
                //  printf("cpRecoGrammar->AddWordTransition(1) fail. hr = %x", hr);
                //  return -11;
                //}
                //if (FAILED(hr = cpRecoGrammar->AddWordTransition(hRule, NULL, L"self", NULL, SPWT_LEXICAL, 1, NULL)))
                //{
                //  printf("cpRecoGrammar->AddWordTransition(2) fail. hr = %x", hr);
                //  return -12;
                //}
                //if (FAILED(hr = cpRecoGrammar->AddWordTransition(hRule, NULL, L"SAPI beta", NULL, SPWT_LEXICAL, 1, NULL)))
                //{
                //  printf("cpRecoGrammar->AddWordTransition(3) fail. hr = %x", hr);
                //  return -13;
                //}

                if (FAILED(hr = cpRecoGrammar->Commit(NULL)))
                {
                    printf("cpRecoGrammar->Commit() fail. hr = %x", hr);
                    return -14;
                }

                if (FAILED(hr = cpRecoGrammar->SetGrammarState(SPGS_ENABLED)))
                {
                    printf("cpRecoGrammar->SetGrammarState() fail. hr = %x", hr);
                    return -15;
                }

                if (FAILED(hr = cpRecoGrammar->SetRuleState(NULL, NULL, SPRS_ACTIVE)))
                {
                    printf("cpRecoGrammar->SetRuleState() fail. hr = %x", hr);
                }

                /
                printf("Read to listen your command:\n");
                USES_CONVERSION;
                CComPtr<ISpRecoResult> cpResult; 
                while (SUCCEEDED(hr = BlockForResult(cpRecoCtxt, &cpResult)))
                {

                    CSpDynamicString dstrText;
                    if (SUCCEEDED(cpResult->GetText(SP_GETWHOLEPHRASE, SP_GETWHOLEPHRASE, TRUE, &dstrText, NULL)))
                    {
                        printf("I heard:  %s \n", W2A(dstrText));
                        if (fUseTTS)
                        {
                            cpVoice->Speak(L"I heard", SPF_ASYNC, NULL);
                            cpVoice->Speak(dstrText, SPF_ASYNC, NULL);
                        }
                        if (fReplay)
                        {
                            if (fUseTTS)
                                cpVoice->Speak(L"when you said", SPF_ASYNC, NULL);
                            else
                                printf(" when you said ");
                            cpResult->SpeakAudio(NULL, 0, NULL, NULL);
                        }
                        cpResult.Release();
                    }
                }
            }
        }
        ::CoUninitialize();
    }
    return hr;
}

命令与控制模式需要使用到配置文件来定义“候选命令”范围，本例中用到XML配置文件“conf.xml”如下：

<GRAMMAR LANGID="804"> 
    <DEFINE>
        <ID NAME="CMD" VAL="10"/>
    </DEFINE>
    <RULE NAME="COMMAND" ID="CMD" TOPLEVEL="ACTIVE">
        <L>
            <p>东南大学</P>
            <p>滴水洞</p>
            <p>运行趋势分析</p>
            <p>接地监视</p>
            <p>模型异动</p>
            <p>中科院</p>
        </L>
    </RULE>
</GRAMMAR>