.NET中使用Speex -- 音频数据编解码

最新推荐文章于 2022-01-13 21:50:50 发布

newHung

最新推荐文章于 2022-01-13 21:50:50 发布

阅读量1k

点赞数

Speex是一套开源的音频编解码库，最新版本还包含了回音消除和防抖动等功能，如果我们想开发语音聊天或视频会议这样的系统，Speex将是一个不错的选择。到 http://www.speex.org可以下载Speex的源码（编译后的dll为libspeex.dll），最新版本为1.2。不过源码是用C++开发的，直接在.NET中使用会有诸多不便，为此，我用C#将其封装，使得编解码的调用相当简单。

　　由于Speex原始导出的API不是很方便C#调用，所以，在用C#封装之前，先要用C++对Speex的原始API进行简化，新建一个名为Speex的VC项目，然后引用libspeex.dll的相关库文件，添加cpp文件后，复制下列源码到文件中：

 
   
 
 
  #include  
  " 
  speex\speex.h 
  " 
  
 #include  
  < 
  windows.h 
  > 
  
 #include  
  < 
  stdio.h 
  > 
  
 #include  
  < 
  stdlib.h 
  > 
  
 
 #include  
  " 
  speex/speex_echo.h 
  " 
  
 #include  
  " 
  speex/speex_preprocess.h 
  " 
   
 #include  
  " 
  Speex.h 
  " 
  
 
  
  #define 
   FRAME_SIZE 160 
  
 
  
  float 
   encoder_input[FRAME_SIZE];
  
  void 
    
  * 
  encoder_state;
 SpeexBits encoder_bits;
 
 BOOL APIENTRY DllMain( HANDLE hModule, 
                        DWORD  ul_reason_for_call, 
                        LPVOID lpReserved
                      )
 {
      
  return 
   TRUE;
 } 
 

     
 
extern "C" __declspec(dllexport) void encoder_init(int quality)
 {
     encoder_state = speex_encoder_init(&speex_nb_mode);
     speex_encoder_ctl(encoder_state, SPEEX_SET_QUALITY, &quality);
     speex_bits_init(&encoder_bits);
 }
 
 extern "C" __declspec(dllexport) void encoder_dispose()
 {
     speex_encoder_destroy(encoder_state);
     speex_bits_destroy(&encoder_bits);
 }
 
 extern "C" __declspec(dllexport) int encoder_encode(const short *data, char *output)
 {
     for (int i = 0; i < FRAME_SIZE; i++)
         encoder_input[i] = data[i];
     speex_bits_reset(&encoder_bits);
     speex_encode(encoder_state, encoder_input, &encoder_bits);
     return speex_bits_write(&encoder_bits, output, 200);
 }
 
 
 float decoder_output[FRAME_SIZE];
 void *decoder_state;
 SpeexBits decoder_bits;
 
 extern "C" __declspec(dllexport) void decoder_init()
 {
     decoder_state = speex_decoder_init(&speex_nb_mode);
     int tmp = 1;
     speex_decoder_ctl(decoder_state, SPEEX_SET_ENH, &tmp);
     speex_bits_init(&decoder_bits);
 }
 extern "C" __declspec(dllexport) void decoder_dispose()
 {
     speex_decoder_destroy(decoder_state);
     speex_bits_destroy(&decoder_bits);
 }
 extern "C" __declspec(dllexport) void decoder_decode(int nbBytes, char *data, short *output)
 {
     speex_bits_read_from(&decoder_bits, data, nbBytes);
     speex_decode(decoder_state, &decoder_bits, decoder_output);
     for (int i = 0; i < FRAME_SIZE; i++)
     {
         output[i] = decoder_output[i];
     }
 }
 
 
 /***************************************************  回音消除 **************************************/
 
 bool      m_bSpeexEchoHasInit;
 SpeexEchoState*   m_SpeexEchoState;
 SpeexPreprocessState* m_pPreprocessorState;
 int      m_nFilterLen;
 int      m_nSampleRate;
 float*   m_pfNoise;
 
 extern "C" __declspec(dllexport) void SpeexEchoCapture(short* input_frame, short* output_frame)
 {
     speex_echo_capture(m_SpeexEchoState, input_frame, output_frame);
 }
 
 extern "C" __declspec(dllexport) void SpeexEchoPlayback(short* echo_frame)
 {
     speex_echo_playback(m_SpeexEchoState, echo_frame);
 }
 
 extern "C" __declspec(dllexport) void SpeexEchoReset()
 {
     if (m_SpeexEchoState != NULL)
     {
         speex_echo_state_destroy(m_SpeexEchoState);
         m_SpeexEchoState = NULL;
     }
     if (m_pPreprocessorState != NULL)
     {
         speex_preprocess_state_destroy(m_pPreprocessorState);
         m_pPreprocessorState = NULL;
     }
     if (m_pfNoise != NULL)
     {
         delete []m_pfNoise;
         m_pfNoise = NULL;
     }
     m_bSpeexEchoHasInit = false;
 }
 
 extern "C" __declspec(dllexport) void SpeexEchoInit(int filter_length, int sampling_rate ,bool associatePreprocesser)
 {
     SpeexEchoReset(); 
 
     if (filter_length<=0 || sampling_rate<=0)
     {
       m_nFilterLen  = 160*8;
       m_nSampleRate = 8000;
     }
     else
     {
       m_nFilterLen  = filter_length;
       m_nSampleRate = sampling_rate;
     }
 
     m_SpeexEchoState = speex_echo_state_init(FRAME_SIZE, m_nFilterLen);
     m_pPreprocessorState = speex_preprocess_state_init(FRAME_SIZE, m_nSampleRate);
     if(associatePreprocesser)
     {
         speex_preprocess_ctl(m_pPreprocessorState, SPEEX_PREPROCESS_SET_ECHO_STATE,m_SpeexEchoState);
     }
     m_pfNoise = new float[FRAME_SIZE+1];
     m_bSpeexEchoHasInit = true;
 } 
   
 

extern " C " __declspec(dllexport) void SpeexEchoDoAEC( short * mic, short * ref , short * out )
{
     if ( ! m_bSpeexEchoHasInit)
    {
       return ;
    }

    speex_echo_cancellation(m_SpeexEchoState,( const __int16 * ) mic,( const __int16 * ) ref ,(__int16 * ) out );
     }

　　编译便生成Speex.dll。

　　如果对VC不熟悉也没关系，文末会直接给出libspeex.dll和Speex.dll的下载，直接使用就OK了。

　　现在，C#可以调用Speex.dll导出的简单函数了，最终封装的源码如下：

 
   
 
 
       
  /// 
    
  <summary> 
  
      
  /// 
   对Speex的C#封装。
      
  /// 
   zhuweisky 2010.05.13
      
  /// 
    
  </summary> 
  
  
       
  public 
    
  class 
   Speex :IAudioCodec
     {
          
  private 
    
  const 
    
  int 
   FrameSize  
  = 
    
  160 
  ;
 
          
  #region 
   IsDisposed 
  
          
  private 
    
  volatile 
    
  bool 
   isDisposed  
  = 
    
  false 
  ;
          
  public 
    
  bool 
   IsDisposed
         {
              
  get 
   {  
  return 
   isDisposed; }
         } 
          
  #endregion 
  
 
          
  #region 
   Ctor 
  
          
  /// 
    
  <summary> 
  
          
  /// 
   初始化。
          
  /// 
    
  </summary> 
  
          
  /// 
    
  <param name="quality"> 
  编码质量，取值0~10 
  </param> 
  
  
           
  public 
   Speex( 
  int 
   quality)
         {
              
  if 
   (quality  
  < 
    
  0 
    
  || 
   quality  
  > 
    
  10 
  )
             {
                  
  throw 
    
  new 
   Exception( 
  " 
  quality value must be between 0 and 10. 
  " 
  );
             }
 
             Speex.encoder_init(quality);
             Speex.decoder_init();
         }
          
  #endregion 
  
 
          
  #region 
   Dispose 
  
          
  public 
    
  void 
   Dispose()
         {
              
  this 
  .isDisposed  
  = 
    
  true 
  ;
             System.Threading.Thread.Sleep( 
  100 
  );
             Speex.decoder_dispose();
             Speex.encoder_dispose();
         }
          
  #endregion 
  
 
          
  #region 
   Encode 
  
          
  /// 
    
  <summary> 
  
          
  /// 
   将采集到的音频数据进行编码。
          
  /// 
    
  </summary> 
          
  
  
           
  public 
    
  byte 
  [] Encode( 
  byte 
  [] data)
         {
              
  if 
   ( 
  this 
  .isDisposed)
             {
                  
  return 
    
  null 
  ;
             }
 
              
  if 
   (data.Length  
  % 
   (FrameSize  
  * 
    
  2 
  )  
  != 
    
  0 
  )
             {
                  
  throw 
    
  new 
   ArgumentException( 
  " 
  Invalid Data Length. 
  " 
  );
             }
 
              
  int 
   nbBytes;
              
  short 
  [] input  
  = 
    
  new 
    
  short 
  [FrameSize];
              
  byte 
  [] buffer  
  = 
    
  new 
    
  byte 
  [ 
  200 
  ];
              
  byte 
  [] output  
  = 
    
  new 
    
  byte 
  [ 
  0 
  ];
              
  for 
   ( 
  int 
   i  
  = 
    
  0 
  ; i  
  < 
   data.Length  
  / 
   (FrameSize  
  * 
    
  2 
  ); i 
  ++ 
  )
             {
                  
  for 
   ( 
  int 
   j  
  = 
    
  0 
  ; j  
  < 
   input.Length; j 
  ++ 
  )
                 {
                     input[j]  
  = 
   ( 
  short 
  )(data[i  
  * 
   FrameSize  
  * 
    
  2 
    
  + 
   j  
  * 
    
  2 
  ]  
  + 
   data[i  
  * 
   FrameSize  
  * 
    
  2 
    
  + 
   j  
  * 
    
  2 
    
  + 
    
  1 
  ]  
  * 
    
  0x100 
  );
                 }
 
                 nbBytes  
  = 
   Speex.encoder_encode(input, buffer);
                 Array.Resize 
  < 
  byte 
  > 
  ( 
  ref 
   output, output.Length  
  + 
   nbBytes  
  + 
    
  sizeof 
  ( 
  int 
  ));
                 Array.Copy(buffer,  
  0 
  , output, output.Length  
  - 
   nbBytes, nbBytes);
 
                  
  for 
   ( 
  int 
   j  
  = 
    
  0 
  ; j  
  < 
    
  sizeof 
  ( 
  int 
  ); j 
  ++ 
  )
                 {
                     output[output.Length  
  - 
   nbBytes  
  - 
    
  sizeof 
  ( 
  int 
  )  
  + 
   j]  
  = 
   ( 
  byte 
  )(nbBytes  
  % 
    
  0x100 
  );
                     nbBytes  
  /= 
    
  0x100 
  ;
                 }
             }
              
  return 
   output;
         }
          
  #endregion 
  
 
          
  #region 
   Decode 
  
          
  /// 
    
  <summary> 
  
          
  /// 
   将编码后的数据进行解码得到原始的音频数据。
          
  /// 
    
  </summary> 
         
  
  
           
  public 
    
  byte 
  [] Decode( 
  byte 
  [] data)
         {
              
  if 
   ( 
  this 
  .isDisposed)
             {
                  
  return 
    
  null 
  ;
             }
 
              
  int 
   nbBytes, index  
  = 
    
  0 
  ;
              
  byte 
  [] input;
              
  short 
  [] buffer  
  = 
    
  new 
    
  short 
  [FrameSize];
              
  byte 
  [] output  
  = 
    
  new 
    
  byte 
  [ 
  0 
  ];
              
  while 
   (index  
  < 
   data.Length)
             {
                 nbBytes  
  = 
    
  0 
  ;
                 index  
  += 
    
  sizeof 
  ( 
  int 
  );
                  
  for 
   ( 
  int 
   i  
  = 
    
  1 
  ; i  
  <= 
    
  sizeof 
  ( 
  int 
  ); i 
  ++ 
  )
                     nbBytes  
  = 
   nbBytes  
  * 
    
  0x100 
    
  + 
   data[index  
  - 
   i];
                 input  
  = 
    
  new 
    
  byte 
  [nbBytes];
                 Array.Copy(data, index, input,  
  0 
  , input.Length);
                 index  
  += 
   input.Length;
                 Speex.decoder_decode(nbBytes, input, buffer);
                 Array.Resize 
  < 
  byte 
  > 
  ( 
  ref 
   output, output.Length  
  + 
   FrameSize  
  * 
    
  2 
  );
                  
  for 
   ( 
  int 
   i  
  = 
    
  0 
  ; i  
  < 
   FrameSize; i 
  ++ 
  )
                 {
                     output[output.Length  
  - 
   FrameSize  
  * 
    
  2 
    
  + 
   i  
  * 
    
  2 
  ]  
  = 
   ( 
  byte 
  )(buffer[i]  
  % 
    
  0x100 
  );
                     output[output.Length  
  - 
   FrameSize  
  * 
    
  2 
    
  + 
   i  
  * 
    
  2 
    
  + 
    
  1 
  ]  
  = 
   ( 
  byte 
  )(buffer[i]  
  / 
    
  0x100 
  );
                 }
             }
              
  return 
   output;
         }
          
  #endregion 
  
 
          
  #region 
   Pinvoke 
  
         [DllImport( 
  " 
  Speex.dll 
  " 
  , EntryPoint  
  = 
    
  " 
  encoder_init 
  " 
  )]
          
  internal 
    
  extern 
    
  static 
    
  void 
   encoder_init( 
  int 
   quality);
         [DllImport( 
  " 
  Speex.dll 
  " 
  , EntryPoint  
  = 
    
  " 
  encoder_dispose 
  " 
  )]
          
  internal 
    
  extern 
    
  static 
    
  void 
   encoder_dispose();
         [DllImport( 
  " 
  Speex.dll 
  " 
  , EntryPoint  
  = 
    
  " 
  encoder_encode 
  " 
  )]
          
  internal 
    
  extern 
    
  static 
    
  int 
   encoder_encode( 
  short 
  [] data,  
  byte 
  [] output);
         [DllImport( 
  " 
  Speex.dll 
  " 
  , EntryPoint  
  = 
    
  " 
  decoder_init 
  " 
  )]
          
  internal 
    
  extern 
    
  static 
    
  void 
   decoder_init();
         [DllImport( 
  " 
  Speex.dll 
  " 
  , EntryPoint  
  = 
    
  " 
  decoder_dispose 
  " 
  )]
          
  internal 
    
  extern 
    
  static 
    
  void 
   decoder_dispose();
         [DllImport( 
  " 
  Speex.dll 
  " 
  , EntryPoint  
  = 
    
  " 
  decoder_decode 
  " 
  )]
          
  internal 
    
  extern 
    
  static 
    
  void 
   decoder_decode( 
  int 
   nbBytes,  
  byte 
  [] data,  
  short 
  [] output);      
          
  #endregion 
  
     } 
 
 
   
 

只有四个方法：Initialize、Encode、Decode、Dispose。方法参数的含义也非常明显。

　一般音频对话的整个流程是这样的：采集 -> 编码 -> 网络传输 -> 解码 -> 播放。

　而该封装的Speex类解决了这个过程中的音频编码和解码的问题。你可以复制该源码到你的项目，并将从http://www.speex.org下载的speex.dll放到运行目录下，就可以正常地使用SPEEX的编解码功能了。

　关于Speex更高级的功能，我正在研究中，有兴趣的朋友可以email给我一起探讨。　　

2012.11.20 我们的研究成果已经全部集成到了OMCS中，其支持回音消除（AEC）、静音检测（VAD）、噪音抑制（DENOISE）、自动增益（AGC）等网络语音技术，有兴趣的可以了解一下。

newHung

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
0
评论
.NET中使用Speex -- 音频数据编解码

Speex是一套开源的音频编解码库，最新版本还包含了回音消除和防抖动等功能，如果我们想开发语音聊天或视频会议这样的系统，Speex将是一个不错的选择。到 http://www.speex.org可以下载Speex的源码（编译后的dll为libspeex.dll），最新版本为1.2。不过源码是用C++开发的，直接在.NET中使用会有诸多不便，为此，我用C#将其封装，使得编解码的调用相当简单。
复制链接

扫一扫