一、背景调查
结合语音识别与MFC鼠标消息,就拥有了语音鼠标。
不同于市面上,科大讯飞,百度AI的科技同类产品的功能:语音转文字,语音播报,这些。这回是彻底靠语音移动鼠标,单击,双击,滚动,打开网页。
二、心路历程
曾几何时,被鼠标手折磨得难受,所以希望能够靠视觉或者说话来控制鼠标移动。那就先靠语音实现下吧。
三、下载安装
参考这个
添加链接描述
四、重点编程
创建MFC对话框程序。
语音模块初始化
MFC_YuYin::MFC_YuYin()
{
::CoInitialize(NULL); // COM初始化
// 【】字符转语音
CLSIDFromProgID(_T("SAPI.SpVoice"), &CLSID_SpVoice);
//::CoInitializeEx(NULL, COINIT_APARTMENTTHREADED); // COM初始化
pSpEnumTokens = NULL;
if (FAILED(CoCreateInstance(CLSID_SpVoice, NULL, CLSCTX_INPROC_SERVER, IID_ISpVoice, (void**)&pSpVoice)))
{
return;
}
}
BOOL MFC_YuYin::Init_YuYin(CDialog* dlg)
{
dlg_SAPI = dlg;
// 【】语音识别
HRESULT hr;
hr = m_pSREngine.CoCreateInstance(CLSID_SpInprocRecognizer);
if (FAILED(hr)){
dlg_SAPI->MessageBox(L"m_pSREngine.CoCreateInstance");
return FALSE;
}
hr = m_pSREngine->CreateRecoContext(&m_pSRContext);//建立上下文
if (FAILED(hr)){
dlg_SAPI->MessageBox(L" m_pSREngine->CreateRecoContext ");
return FALSE;
}
//这里是设置事件
HWND hwnd = dlg_SAPI->GetSafeHwnd();
hr = m_pSRContext->SetNotifyWindowMessage(hwnd, WM_RECORD, 0, 0);
if (FAILED(hr)){
dlg_SAPI->MessageBox(L"SetNotifyWindowMessage");
return FALSE;
}
hr = m_pSRContext->SetInterest(SPFEI(SPEI_RECOGNITION), SPFEI(SPEI_RECOGNITION));
IEnumSpObjectTokens* m_pIEnumSpObjectTokens = NULL;
SpEnumTokens(SPCAT_VOICES, NULL, NULL, &m_pIEnumSpObjectTokens);
//这里是设置默认的音频输入
hr = SpCreateDefaultObjectFromCategoryId(SPCAT_AUDIOIN, &m_pAudio);
m_pSREngine->SetInput(m_pAudio, true);
//这里是加载默认的语法规则
ullGrammerID = 1000;
hr = m_pSRContext->CreateGrammar(ullGrammerID, &m_pSRGrammar);
if (FAILED(hr)){
dlg_SAPI->MessageBox(L"CreateGrammar");
return FALSE;
}
//WCHAR wszXMLFile[20] = L"";
//MultiByteToWideChar(CP_ACP, 0, (LPCSTR)"sapiInclude\\main.xml", -1, wszXMLFile, 256); //这里修改XML的目录
hr = m_pSRGrammar->LoadCmdFromFile(L"sapiInclude\\main.xml", SPLO_DYNAMIC);
if (FAILED(hr)){
dlg_SAPI->MessageBox(L"LoadCmdFromFile");
return FALSE;
}
//开启语音识别
m_pSRGrammar->SetRuleState(NULL, NULL, SPRS_ACTIVE);
hr = m_pSREngine->SetRecoState(SPRST_ACTIVE);
if (FAILED(hr)){
dlg_SAPI->MessageBox(L"SetRecoState");
return FALSE;
}
return TRUE;
}
main.xml文件里的内容是需要识别的指令
<GRAMMAR LANGID="804">
<DEFINE>
<ID NAME="CMD" VAL="10"/>
</DEFINE>
<RULE NAME="COMMAND" ID="CMD" TOPLEVEL="ACTIVE">
<L>
<p>你好</P>
<p>单击</p>
<p>双击</p>
<p>右键</p>
<p>往上</p>
<p>往下</p>
<p>往左</p>
<p>往右</p>
<p>一直往上</p>
<p>一直往下</p>
<p>一直往左</p>
<p>一直往右</p>
<p>向下滚动</p>
<p>向上滚动</p>
<p>停</p>
<p>快点</p>
<p>慢点</p>
<p>打开哔哩哔哩</p>
<p>打开hao123</p>
<p>回车</p>
<p>Shift</p>
<p>删除</p>
<p>A</p>
<p>B</p>
<p>C</p>
<p>D</p>
<p>E</p>
<p>F</p>
<p>G</p>
<p>H</p>
<p>I</p>
<p>J</p>
<p>K</p>
<p>L</p>
<p>M</p>
<p>N</p>
<p>O</p>
<p>P</p>
<p>Q</p>
<p>R</p>
<p>S</p>
<p>T</p>
<p>U</p>
<p>V</p>
<p>W</p>
<p>X</p>
<p>Y</p>
<p>Z</p>
</L>
</RULE>
</GRAMMAR>
按理说可以识别字母,但是实际用起来不好用,我说“B”,她偏识别成“D”。对鼠标的移动有一些这样的思考:
1、每次移动间距应该是可调的,所以有”快点“,”慢点"这样的指令,就是每次移动的像素数。
2、说一句动一下的功能应该有,并且说一句一直动的功能更应该有,还要有停的指令。但是停反应比较迟缓,因为语音识别本身就需要停顿。这种是考虑到,光标离目标点很远,就移动得快,跨度大,离近了,就该减小跨度,精确对准。
3、单击、双击、右击,滚轮上下,都应该是必备操作。
4、拖动的功能暂时别加了,要不然就要添加另外的指令“按住”,“松手”类似的,退一步说,都这么懒了,让我框选操作,有些违背本性。浏览网页是很少用到框选的。
5、shift,control这些是要添加的键盘配合鼠标的响应。可以切换输入法啥的。
XXdlg.cpp里核心程序段
DWORD WINAPI CtrySAPI3Dlg::Thread_FunRead(LPVOID pthread)
{
CtrySAPI3Dlg* dlg = (CtrySAPI3Dlg*)pthread;
MSG msg;
CString strReplay;
while (dlg->abStateruning.load(std::memory_order_acquire))
{
GetMessage(&msg, NULL, 0, 0);
dlg->myYuYin.m_pSREngine->SetRecoState(SPRST_INACTIVE);
int iMessage = msg.message;
CString strJieshou = (CString)((BSTR)msg.wParam);
if (iMessage == 2587)
{
if (strJieshou.Compare(L"你好") == 0)
{
strReplay = L"你也好啊";
}
else if (strJieshou.Compare(L"单击") == 0)
{
strReplay = L"单击";
dlg->myCat.LeftClick();
}
else if (strJieshou.Compare(L"双击") == 0)
{
strReplay = L"双击";
dlg->myCat.LeftClick();
Sleep(10);
dlg->myCat.LeftClick();
}
else if (strJieshou.Compare(L"右键") == 0)
{
strReplay = L"右键";
dlg->myCat.RightClick();
}
else if (strJieshou.Compare(L"快点") == 0)
{
dlg->myCat.MoveMouseSpeedAdd();
strReplay = strJieshou;
dlg->SetDlgItemTextW(IDC_STATIC3, strReplay);
dlg->myYuYin.m_pSREngine->SetRecoState(SPRST_ACTIVE);
continue;
}
else if (strJieshou.Compare(L"慢点") == 0)
{
dlg->myCat.MoveMouseSpeedMinus();
strReplay = strJieshou;
dlg->SetDlgItemTextW(IDC_STATIC3, strReplay);
dlg->myYuYin.m_pSREngine->SetRecoState(SPRST_ACTIVE);
continue;
}
else if (strJieshou.Compare(L"往上") == 0)
{
dlg->myCat.MoveMouseSub(1,5);
strReplay = strJieshou;
dlg->SetDlgItemTextW(IDC_STATIC3, strReplay);
dlg->myYuYin.m_pSREngine->SetRecoState(SPRST_ACTIVE);
continue;
}
else if (strJieshou.Compare(L"往下") == 0)
{
dlg->myCat.MoveMouseSub(2, 5);
strReplay = strJieshou;
dlg->SetDlgItemTextW(IDC_STATIC3, strReplay);
dlg->myYuYin.m_pSREngine->SetRecoState(SPRST_ACTIVE);
continue;
}
else if (strJieshou.Compare(L"往左") == 0)
{
dlg->myCat.MoveMouseSub(3, 5);
strReplay = strJieshou;
dlg->SetDlgItemTextW(IDC_STATIC3, strReplay);
dlg->myYuYin.m_pSREngine->SetRecoState(SPRST_ACTIVE);
continue;
}
else if (strJieshou.Compare(L"往右") == 0)
{
dlg->myCat.MoveMouseSub(4, 5);
strReplay = strJieshou;
dlg->SetDlgItemTextW(IDC_STATIC3, strReplay);
dlg->myYuYin.m_pSREngine->SetRecoState(SPRST_ACTIVE);
continue;
}
else if (strJieshou.Compare(L"一直往上") == 0)
{
dlg->myCat.abiDerictiron_0stop_1up_2down_3left_4right.store(1, std::memory_order_release);
strReplay = strJieshou;
dlg->SetDlgItemTextW(IDC_STATIC3, strReplay);
dlg->myYuYin.m_pSREngine->SetRecoState(SPRST_ACTIVE);
continue;
}
else if (strJieshou.Compare(L"一直往下") == 0)
{
dlg->myCat.abiDerictiron_0stop_1up_2down_3left_4right.store(2, std::memory_order_release);
strReplay = strJieshou;
dlg->SetDlgItemTextW(IDC_STATIC3, strReplay);
dlg->myYuYin.m_pSREngine->SetRecoState(SPRST_ACTIVE);
continue;
}
else if (strJieshou.Compare(L"一直往左") == 0)
{
dlg->myCat.abiDerictiron_0stop_1up_2down_3left_4right.store(3, std::memory_order_release);
strReplay = strJieshou;
dlg->SetDlgItemTextW(IDC_STATIC3, strReplay);
dlg->myYuYin.m_pSREngine->SetRecoState(SPRST_ACTIVE);
continue;
}
else if (strJieshou.Compare(L"一直往右") == 0)
{
dlg->myCat.abiDerictiron_0stop_1up_2down_3left_4right.store(4, std::memory_order_release);
strReplay = strJieshou;
dlg->SetDlgItemTextW(IDC_STATIC3, strReplay);
dlg->myYuYin.m_pSREngine->SetRecoState(SPRST_ACTIVE);
continue;
}
else if (strJieshou.Compare(L"停") == 0)
{
dlg->myCat.abiDerictiron_0stop_1up_2down_3left_4right.store(0, std::memory_order_release);
strReplay = strJieshou;
dlg->SetDlgItemTextW(IDC_STATIC3, strReplay);
dlg->myYuYin.m_pSREngine->SetRecoState(SPRST_ACTIVE);
continue;
}
else if (strJieshou.Compare(L"向上滚动") == 0)
{
dlg->myCat.WheelUp();
strReplay = strJieshou;
dlg->SetDlgItemTextW(IDC_STATIC3, strReplay);
dlg->myYuYin.m_pSREngine->SetRecoState(SPRST_ACTIVE);
continue;
}
else if (strJieshou.Compare(L"向下滚动") == 0)
{
dlg->myCat.WheelDown();
strReplay = strJieshou;
dlg->SetDlgItemTextW(IDC_STATIC3, strReplay);
dlg->myYuYin.m_pSREngine->SetRecoState(SPRST_ACTIVE);
continue;
}
else if (strJieshou.Compare(L"打开哔哩哔哩") == 0)
{
ShellExecute(NULL, L"Open", _T("msedge.exe"), _T("https://www.bilibili.com/"), NULL, SW_MAXIMIZE);
strReplay = L"打开哔哩哔哩";
}
else if (strJieshou.Compare(L"打开hao123") == 0)
{
ShellExecute(NULL, L"Open", _T("msedge.exe"), _T("https://www.hao123.com/"), NULL, SW_MAXIMIZE);
strReplay = L"打开hao123";
}
else if (strJieshou.Compare(L"回车") == 0)
{
keybd_event(VK_RETURN, 0, 0, 0);
Sleep(1);
keybd_event(VK_RETURN, 0, KEYEVENTF_KEYUP, 0);
strReplay = L"回车";
}
else if (strJieshou.Compare(L"Shift") == 0) //切换输入法
{
keybd_event(VK_SHIFT, 0, 0, 0);
Sleep(1);
keybd_event(VK_SHIFT, 0, KEYEVENTF_KEYUP, 0);
strReplay = L"Shift";
}
else if (strJieshou.Compare(L"删除") == 0) //切换输入法
{
keybd_event(VK_BACK, 0, 0, 0);
Sleep(1);
keybd_event(VK_BACK, 0, KEYEVENTF_KEYUP, 0);
strReplay = L"删除";
}
else if (strJieshou.Compare(L"A") == 0
|| strJieshou.Compare(L"B") == 0
|| strJieshou.Compare(L"C") == 0
|| strJieshou.Compare(L"D") == 0
|| strJieshou.Compare(L"E") == 0
|| strJieshou.Compare(L"F") == 0
|| strJieshou.Compare(L"G") == 0
|| strJieshou.Compare(L"H") == 0
|| strJieshou.Compare(L"I") == 0
|| strJieshou.Compare(L"J") == 0
|| strJieshou.Compare(L"K") == 0
|| strJieshou.Compare(L"L") == 0
|| strJieshou.Compare(L"M") == 0
|| strJieshou.Compare(L"N") == 0
|| strJieshou.Compare(L"O") == 0
|| strJieshou.Compare(L"P") == 0
|| strJieshou.Compare(L"Q") == 0
|| strJieshou.Compare(L"R") == 0
|| strJieshou.Compare(L"S") == 0
|| strJieshou.Compare(L"T") == 0
|| strJieshou.Compare(L"U") == 0
|| strJieshou.Compare(L"V") == 0
|| strJieshou.Compare(L"W") == 0
|| strJieshou.Compare(L"X") == 0
|| strJieshou.Compare(L"Y") == 0
|| strJieshou.Compare(L"Z") == 0
)
{
strReplay = dlg->YiGeZiMu_FUn(strJieshou);
}
dlg->SetDlgItemTextW(IDC_STATIC3, strReplay);
dlg->myYuYin.MFC_Speak_ASyn(strReplay);
}
dlg->myYuYin.m_pSREngine->SetRecoState(SPRST_ACTIVE);
}
return 0;
}
此消息线程里
1、abiDerictiron_0stop_1up_2down_3left_4right是鼠标运行状态标志。表示一直运动的状态。
2、鼠标的移动实现:GetCursorPos先得到当前的位置,看往哪边走,就自加或自减
int ix = 0; int iy = 0;
GetNowPos(ix, iy);
if (iDerictiron_0stop_1up_2down_3left_4right == 1)
{
iy = iy - iStep;
if (iy<0)
{
iy = 0;
}
}
再用SetCursorPos设置到那个位置。
3、鼠标滚轮固定120
mouse_event(MOUSEEVENTF_WHEEL, 0, 0, WHEEL_DELTA, 0);
mouse_event(MOUSEEVENTF_WHEEL, 0, 0, -WHEEL_DELTA, 0);
4、要记得初始化时,记录鼠标的位置。
5、用ShellExecute函数打开了响应的网站。
6、运行的时候要获得全局的窗口句柄用 HWND hProgMan = ::FindWindow(L"ProgMan", NULL);
7、需要MFC_Speak_ASyn的是,单击,双击这一类操作,最好还是用语音朗读一下,因为这些指令不执行,看不到效果,有可能执行了就是那个效果,当听到语音提示时,说明这个动作执行了,你点的地方就是不可点击的。这样才更加人性。
五、运行效果
演示一个过程
视频中我们可以在右边看到。运行此程序后,手就可以离开鼠标了。
打开一个网站,移动鼠标,单击搜索记录里的“手工耿”,再移动鼠标,单击打开一个视频,上下滚动,再移动鼠标,单击关闭标签,最后在标题栏右键一下,演示完整过程。体现出人性化,与不接触鼠标就完成操作的优势,可以快乐地解放双手。
懒出天际--语音鼠标,献给不想碰鼠标的人,靠嘴使唤鼠标