Kinect for windows提供了语音识别的能力,它靠Kinect的语音采集流进行识别的,这是建立在微软的语音识虽库的基础上的,关于微软语音识别可以参考http://msdn.microsoft.com/en-us/library/hh361572(v=office.14).aspx。对别Kinect识别的语音,目前只支持如下语言:en-US,de-DE,en-AU,en-CA,en-GB,en-IE,en-NZ,es-ES,es-MX,fr-CA,fr-FR,it-IT,ja-JP(很遗憾没有中文,相信在不远的将来会有的)。

下列代码为语音识别:


  
  1. KinectSensor kinectsensor = null;  
  2.        SpeechRecognitionEngine speechEngine;  
  3.        private void Form1_Shown(object sender, EventArgs e)  
  4.        {  
  5.            //从Kinect集合中找到连接上的Kinect  
  6.            foreach (KinectSensor ks in KinectSensor.KinectSensors)  
  7.            {  
  8.                //找到连接的Kinect  
  9.                if (ks.Status == KinectStatus.Connected)  
  10.                {  
  11.                    kinectsensor = ks;  
  12.                    kinectsensor.Start();//开始工作,即可以采集摄像头和红外摄像头信息                
  13.                    this.Text = "Kinect开始工作……";  
  14.  
  15.                    break;  
  16.                }  
  17.  
  18.            }  
  19.            if (kinectsensor != null)  
  20.            {  
  21.                //主音识别对象  
  22.                RecognizerInfo recognizer = GetKinectRecognizer();  
  23.                if (null != recognizer)  
  24.                {  
  25.                    this.speechEngine = new SpeechRecognitionEngine(recognizer.Id);  
  26.                    //读取xml 配置文件的语言库  
  27.                    using (var memoryStream = new MemoryStream(Encoding.ASCII.GetBytes(Properties.Resources.SpeechGrammar)))  
  28.                    {  
  29.                        var g = new Grammar(memoryStream);  
  30.                        speechEngine.LoadGrammar(g);  
  31.                    }  
  32.                    #region 程序中用代码添加识别字库  
  33.                    //var directions = new Choices();  
  34.                    //directions.Add(new SemanticResultValue("した", "した"));  
  35.                    //directions.Add(new SemanticResultValue("うえ", "うえ"));  
  36.                    //directions.Add(new SemanticResultValue("はじめ", "はじめ"));  
  37.                    //directions.Add(new SemanticResultValue("おわり", "おわり"));  
  38.  
  39.                    //var grammarbuilder = new GrammarBuilder { Culture = recognizer.Culture };  
  40.                    //grammarbuilder.Append(directions);  
  41.  
  42.                    //Grammar grammar = new Grammar(grammarbuilder);  
  43.                    //speechEngine.LoadGrammar(grammar);  
  44.                    #endregion  
  45.  
  46.                    //订阅语音识别委托实例  
  47.                    speechEngine.SpeechRecognized += SpeechRecognized;  
  48.                    //设置语音识别流  
  49.                    speechEngine.SetInputToAudioStream(kinectsensor.AudioSource.Start(), new SpeechAudioFormatInfo(EncodingFormat.Pcm, 16000, 16, 1, 32000, 2, null));  
  50.                    //开始识别  
  51.                    speechEngine.RecognizeAsync(RecognizeMode.Multiple);  
  52.                }  
  53.            }  
  54.        }  
  55.        /// <summary>  
  56.        /// 从对象中获取要使用的识别对象  
  57.        /// </summary>  
  58.        /// <returns></returns>  
  59.        private static RecognizerInfo GetKinectRecognizer()  
  60.        {  
  61.            //从系统所有语单识别集合中找到自己使用的语音识别对象  
  62.            foreach (RecognizerInfo recognizer in SpeechRecognitionEngine.InstalledRecognizers())  
  63.            {  
  64.                string value;  
  65.                //判断是否为Kinect的语音识别对象(微软的语音识别,有为Kinect的,也有非Kinect的,所以要判断是否为Kinect)  
  66.                recognizer.AdditionalInfo.TryGetValue("Kinect"out value);//en-US为英主    ja-JP为日语  
  67.                //第二个条件为采用那种语言  
  68.                if ("True".Equals(value, StringComparison.OrdinalIgnoreCase) && "en-US".Equals(recognizer.Culture.Name, StringComparison.OrdinalIgnoreCase))  
  69.                {  
  70.                    return recognizer;  
  71.                }  
  72.            }  
  73.            return null;  
  74.        }  
  75.  
  76.        private void SpeechRecognized(object sender, SpeechRecognizedEventArgs e)  
  77.        {  
  78.            const double ConfidenceThreshold = 0.4;  
  79.            //e.Result.Confidence为词语识别率,值为0到1,值越大,识别率越准确  
  80.            if (e.Result.Confidence >= ConfidenceThreshold)  
  81.            {  
  82.                //匹配的词语  
  83.                switch (e.Result.Semantics.Value.ToString())  
  84.                {  
  85.                    //匹配xml ,为英语识别  
  86.                    case "Next":  
  87.                        Message_Lab.Text = "向下";  
  88.                        break;  
  89.                    case "Previous":  
  90.                        Message_Lab.Text = "向上";  
  91.                        break;  
  92.                    case "Run":  
  93.                        Message_Lab.Text = "开始";  
  94.                        break;  
  95.                    case "Close":  
  96.                        Message_Lab.Text = "停止";  
  97.                        break;  
  98.                    //匹配xml ,为日语识别  
  99.                    //case "した":  
  100.                    //    Message_Lab.Text = "向下";  
  101.                    //    break;  
  102.                    //case "うえ":  
  103.                    //    Message_Lab.Text = "向上";  
  104.                    //    break;  
  105.                    //case "はじめ":  
  106.                    //    Message_Lab.Text = "开始";  
  107.                    //    break;  
  108.                    //case "おわり":  
  109.                    //    Message_Lab.Text = "停止";  
  110.                    //    break;  
  111.                }  
  112.            }  
  113.        }  
  114.  
  115.        private void Form1_FormClosing(object sender, FormClosingEventArgs e)  
  116.        {  
  117.            if (kinectsensor != null && kinectsensor.Status == KinectStatus.Connected)  
  118.            {  
  119.                kinectsensor.AudioSource.Stop();  
  120.                kinectsensor.Stop();//结束Kinect采集工作  
  121.                MessageBox.Show("Kinect结束工作!");  
  122.  
  123.            }  
  124.        }  

其中语音识别的匹配xml为:


  
  1. <grammar version="1.0" xml:lang="en-US" root="rootRule" tag-format="semantics/1.0-literals" xmlns="http://www.w3.org/2001/06/grammar"> 
  2.   <rule id="rootRule"> 
  3.     <one-of> 
  4.       <item> 
  5.         <tag>Next</tag> 
  6.         <one-of> 
  7.           <item> next </item> 
  8.           <item> n </item> 
  9.           <item> down </item> 
  10.           <item> qian </item>     
  11.         </one-of> 
  12.       </item> 
  13.       <item> 
  14.         <tag>Previous</tag> 
  15.         <one-of> 
  16.           <item> previous </item> 
  17.           <item> p </item> 
  18.           <item> up </item> 
  19.           <item> hou </item>       
  20.         </one-of> 
  21.       </item> 
  22.       <item> 
  23.         <tag>Run</tag> 
  24.         <one-of> 
  25.           <item> run </item> 
  26.           <item> open </item> 
  27.           <item> r </item>       
  28.         </one-of> 
  29.       </item> 
  30.       <item> 
  31.         <tag>Close</tag> 
  32.         <one-of> 
  33.           <item> close </item> 
  34.           <item> exit </item> 
  35.           <item> c </item>         
  36.         </one-of> 
  37.       </item> 
  38.     </one-of> 
  39.   </rule> 
  40. </grammar>