使用C/C++实现Librosa，求STFT、Mel、MFCC！！！！

我路过，

已于 2024-06-13 21:52:01 修改

阅读量1.8k

点赞数 14

文章标签： c语言 c++ 开发语言 python

于 2024-06-01 23:20:10 首次发布

本文链接：https://blog.csdn.net/ndm134530/article/details/139381038

版权

本文详细介绍了如何使用C++实现基于VS2017的Librosa库，以处理wav音频文件，实现短时傅里叶变换（STFT）、梅尔滤波器（Mel）和梅尔频率倒谱系数（MFCC）计算。通过自定义头文件和源文件，实现了与Python中Librosa类似的功能，包括音频的预处理、STFT、梅尔谱和MFCC的计算，并提供了性能测试的时间消耗。

摘要生成于 C知道，由 DeepSeek-R1 满血版支持，前往体验 >

创作来源：

在语音识别的任务中，我们通常使用Python求语音信号的频谱特征，有时需要在其它平台使用该特征，网络上现有的使用C、C++求MFCC特征的案例很少，本文使用通俗易操作的代码基于VS2017实现使用C、C++求wav的频谱特征。只需要修改窗口函数大小，采样频率等相应的参数即可......

头文件1:librosa.h

#ifndef LIBROSA_H_
#define LIBROSA_H_

#include "eigen3/Eigen/Core"
#include "eigen3/unsupported/Eigen/FFT"

#include <vector>
#include <complex>
#include <iostream>

///
/// \brief c++ implemention of librosa
///
namespace librosa {

#ifndef M_PI
#define M_PI 3.14159265358979323846
#endif // !M_PI

   typedef Eigen::Matrix<float, 1, Eigen::Dynamic, Eigen::RowMajor> Vectorf;
   typedef Eigen::Matrix<std::complex<float>, 1, Eigen::Dynamic, Eigen::RowMajor> Vectorcf;
   typedef Eigen::Matrix<float, Eigen::Dynamic, Eigen::Dynamic, Eigen::RowMajor> Matrixf;
   typedef Eigen::Matrix<std::complex<float>, Eigen::Dynamic, Eigen::Dynamic, Eigen::RowMajor> Matrixcf;

namespace internal {

       static Vectorf pad(Vectorf &x, int left, int right, const std::string &mode, float value) {
           Vectorf x_paded = Vectorf::Constant(left + x.size() + right, value);
           x_paded.segment(left, x.size()) = x;

           if (mode.compare("reflect") == 0) {
               for (int i = 0; i < left; ++i) {
                   x_paded[i] = x[left - i];
               }
               for (int i = left; i < left + right; ++i) {
                   x_paded[i + x.size()] = x[x.size() - 2 - i + left];
               }
           }

           if (mode.compare("symmetric") == 0) {
               for (int i = 0; i < left; ++i) {
                   x_paded[i] = x[left - i - 1];
               }
               for (int i = left; i < left + right; ++i) {
                   x_paded[i + x.size()] = x[x.size() - 1 - i + left];
               }
           }

           if (mode.compare("edge") == 0) {
               for (int i = 0; i < left; ++i) {
                   x_paded[i] = x[0];
               }
               for (int i = left; i < left + right; ++i) {
                   x_paded[i + x.size()] = x[x.size() - 1];
               }
           }
           return x_paded;
       }

       static Matrixcf stft(Vectorf &x, int n_fft, int n_hop, const std::string &win, bool center, const std::string &mode) {
           // hanning
           Vectorf window = 0.5*(1.f - (Vectorf::LinSpaced(n_fft, 0.f, static_cast<float>(n_fft - 1))*2.f*M_PI / n_fft).array().cos());

int pad_len = center ? n_fft / 2 : 0;
Vectorf x_paded = pad(x, pad_len, pad_len, mode, 0.f);

           int n_f = n_fft / 2 + 1;
           int n_frames = 1 + (x_paded.size() - n_fft) / n_hop;
           Matrixcf X(n_frames, n_fft);
           Eigen::FFT<float> fft;

           for (int i = 0; i < n_frames; ++i) {
               Vectorf x_frame = window.array()*x_paded.segment(i*n_hop, n_fft).array();
               X.row(i) = fft.fwd(x_frame);
           }
           return X.leftCols(n_f);
       }

       static Matrixf spectrogram(Matrixcf &X, float power = 1.f) {
           return X.cwiseAbs().array().pow(power);
       }

       static Matrixf melfilter(int sr, int n_fft, int n_mels, int fmin, int fmax) {
           int n_f = n_fft / 2 + 1;
           Vectorf fft_freqs = (Vectorf::LinSpaced(n_f, 0.f, static_cast<float>(n_f - 1))*sr) / n_fft;

           float f_min = 0.f;
           float f_sp = 200.f / 3.f;
           float min_log_hz = 1000.f;
           float min_log_mel = (min_log_hz - f_min) / f_sp;
           float logstep = logf(6.4f) / 27.f;

           auto hz_to_mel = [=](int hz, bool htk = false) -> float {
               if (htk) {
                   return 2595.0f*log10f(1.0f + hz / 700.0f);
               }
               float mel = (hz - f_min) / f_sp;
               if (hz >= min_log_hz) {
                   mel = min_log_mel + logf(hz / min_log_hz) / logstep;
               }
               return mel;
           };
           auto mel_to_hz = [=](Vectorf &mels, bool htk = false) -> Vectorf {
               if (htk) {
                   return 700.0f*(Vectorf::Constant(n_mels + 2, 10.f).array().pow(mels.array() / 2595.0f) - 1.0f);
               }
               return (mels.array() > min_log_mel).select(((mels.array() - min_log_mel)*logstep).exp()*min_log_hz, (mels*f_sp).array() + f_min);
           };