Kaldi中应用CMVN
Author: Xin Pan
Date: 2020.01.14
在上一篇博客中记录了CMVN的统计量是如何被记录下来的之前的博客在这里。现在看CMVN如何应用在自己的feats上。
建议各位看官先看之前的这篇文章,可以对CMVN计算代码中的各个变量有好的认识。
Kaldi中compute-cmvn-stats用于计算CMVN的统计量,但是并不将其应用于feats中,apply-cmvn这个命令会将CMVN应用到feats中。
Kaldi官方对于apply-cmvn的解释
Usage
应用倒谱均值和(可选)方差标准化默认情况下为每句话,如果提供了utt2spk选项,则为每名发言者
用法:apply-cmvn[选项](|) e.g.:apply cmvn–utt2spk=ark:data/train/utt2spk scp:data/train/cmvn.scp scp:data/train/feats.scp ark:-
过程
实验继续使用aishell 1 的train set 进行。实验命令如下:
apply-cmvn --utt2spk=ark:data/train/utt2spk --norm-means=true --norm-vars=false scp:data/train/cmvn.scp scp:data/train/feats.scp ark:after_cmvn.ark
int main(int argc, char *argv[]) {
try {
using namespace kaldi;
const char *usage =
"Apply cepstral mean and (optionally) variance normalization\n"
"Per-utterance by default, or per-speaker if utt2spk option provided\n"
"Usage: apply-cmvn [options] (<cmvn-stats-rspecifier>|<cmvn-stats-rxfilename>) <feats-rspecifier> <feats-wspecifier>\n"
"e.g.: apply-cmvn --utt2spk=ark:data/train/utt2spk scp:data/train/cmvn.scp scp:data/train/feats.scp ark:-\n"
"See also: modify-cmvn-stats, matrix-sum, compute-cmvn-stats\n";
ParseOptions po(usage);
std::string utt2spk_rspecifier;
bool norm_vars = false;
bool norm_means = true;
bool reverse = false;
std::string skip_dims_str;
po.Register("utt2spk", &utt2spk_rspecifier,
"rspecifier for utterance to speaker map");
po.Register("norm-vars", &norm_vars, "If true, normalize variances.");
po.Register("norm-means", &norm_means, "You can set this to false to turn off mean "
"normalization. Note, the same can be achieved by using 'fake' CMVN stats; "
"see the --fake option to compute_cmvn_stats.sh");
po.Register("skip-dims", &skip_dims_str, "Dimensions for which to skip "
"normalization: colon-separated list of integers, e.g. 13:14:15)");
po.Register("reverse", &reverse, "If true, apply CMVN in a reverse sense, "
"so as to transform zero-mean, unit-variance input into data "
"with the given mean and variance.");
po.Read(argc, argv);
if (po.NumArgs() != 3) {
po.PrintUsage();
exit(1);
}
if (norm_vars && !norm_means)
// 如果对方差归一化就一定要对均值归一化
KALDI_ERR << "You cannot normalize the variance but not the mean.";
std::string cmvn_rspecifier_or_rxfilename = po.GetArg(1);
std::string feat_rspecifier = po.GetArg(2);
std::string feat_wspecifier = po.GetArg(3);
if (!norm_means) {
//如果不进行均值归一化处理的话就是将特征重新重新写一次
// CMVN is a no-op, we're not doing anything. Just echo the input
// don't even uncompress, if it was a CompressedMatrix.
SequentialGeneralMatrixReader reader(feat_rspecifier);
GeneralMatrixWriter writer(feat_wspecifier);
kaldi::int32 num_done = 0;
for (; !reader.Done(); reader.Next()) {
writer.Write(reader.Key(), reader.Value());
num_done++;
}
KALDI_LOG << "Copied " << num_done << " utterances.";
return (num_done != 0 ? 0 : 1);
}
std::vector<int32> skip_dims; // optionally use "fake"
// (zero-mean/unit-variance) stats for some
// dims to disable normalization.
if (!SplitStringToIntegers(skip_dims_str, ":", false, &skip_dims)) {
KALDI_ERR << "Bad --skip-dims option (should be colon-separated list of "
<< "integers)";
}
kaldi::int32 num_done = 0, num_err = 0;
SequentialBaseFloatMatrixReader feat_reader(feat_rspecifier);
BaseFloatMatrixWriter feat_writer(feat_wspecifier);
if (ClassifyRspecifier(cmvn_rspecifier_or_rxfilename, NULL, NULL)
!= kNoRspecifier)
{ // reading from a Table: per-speaker or per-utt CMN/CVN.
std::string cmvn_rspecifier = cmvn_rspecifier_or_rxfilename;
RandomAccessDoubleMatrixReaderMapped cmvn_reader(cmvn_rspecifier,
utt2spk_rspecifier);
for (; !feat_reader.Done(); feat_reader.Next())
{
std::string utt = feat_reader.Key();
Matrix<BaseFloat> feat(feat_reader.Value());
if (norm_means)
{ //如果对均值进行归一化就进入这里
if (!cmvn_reader.HasKey(utt))
{
KALDI_WARN << "No normalization statistics available for key "
<< utt << ", producing no output for this utterance";
num_err++;
continue;
}
Matrix<double> cmvn_stats = cmvn_reader.Value(utt);
if (!skip_dims.empty())
FakeStatsForSomeDims(skip_dims, &cmvn_stats);
if (reverse)
{
ApplyCmvnReverse(cmvn_stats, norm_vars, &feat);
}
else
{
// 因为我在使用的时候都是简单的参数设置没有什么更改,所以实际的均值方差归一化在下边的ApplyCmvn进行。这个函数的定义在下边
ApplyCmvn(cmvn_stats, norm_vars, &feat);
}
feat_writer.Write(utt, feat);
}
else
{
// 不对均值进行归一化进入这里,就直接写结果了
feat_writer.Write(utt, feat);
}
num_done++;
}
}
else
{
if (utt2spk_rspecifier != "")
KALDI_ERR << "--utt2spk option not compatible with rxfilename as input "
<< "(did you forget ark:?)";
std::string cmvn_rxfilename = cmvn_rspecifier_or_rxfilename;
bool binary;
Input ki(cmvn_rxfilename, &binary);
Matrix<double> cmvn_stats;
cmvn_stats.Read(ki.Stream(), binary);
if (!skip_dims.empty())
FakeStatsForSomeDims(skip_dims, &cmvn_stats);
for (; !feat_reader.Done(); feat_reader.Next()) {
std::string utt = feat_reader.Key();
Matrix<BaseFloat> feat(feat_reader.Value());
if (norm_means)
{
if (reverse)
{
ApplyCmvnReverse(cmvn_stats, norm_vars, &feat);
}
else
{
ApplyCmvn(cmvn_stats, norm_vars, &feat);
}
}
feat_writer.Write(utt, feat);
num_done++;
}
}
if (norm_vars)
KALDI_LOG << "Applied cepstral mean and variance normalization to "
<< num_done << " utterances, errors on " << num_err;
else
KALDI_LOG << "Applied cepstral mean normalization to "
<< num_done << " utterances, errors on " << num_err;
return (num_done != 0 ? 0 : 1);
}
catch (const std::exception &e)
{
std::cerr << e.what();
return -1;
}
}
在src/transform/cmvn.cc
void ApplyCmvn(const MatrixBase<double> &stats,
bool var_norm,
MatrixBase<BaseFloat> *feats)
{
// 如果进入到了这里首先可以确定就是会对均值进行归一化计算
KALDI_ASSERT(feats != NULL);
int32 dim = stats.NumCols() - 1;
if (stats.NumRows() > 2 || stats.NumRows() < 1 || feats->NumCols() != dim)
{
KALDI_ERR << "Dim mismatch: cmvn "
<< stats.NumRows() << 'x' << stats.NumCols()
<< ", feats " << feats->NumRows() << 'x' << feats->NumCols();
}
if (stats.NumRows() == 1 && var_norm)
KALDI_ERR << "You requested variance normalization but no variance stats "
<< "are supplied.";
double count = stats(0, dim); // count 计算的是现在的这个cmvn状态是由多少帧统计得到
// Do not change the threshold of 1.0 here: in the balanced-cmvn code, when
// computing an offset and representing it as stats, we use a count of one.
if (count < 1.0)
KALDI_ERR << "Insufficient stats for cepstral mean and variance normalization: "
<< "count = " << count;
if (!var_norm)
{ // 如果不对方差归一化的话会进入这里
Vector<BaseFloat> offset(dim);
SubVector<double> mean_stats(stats.RowData(0), dim); //mean_stats是从stats中拿的均值部分
offset.AddVec(-1.0 / count, mean_stats); // offset现在得到的是feats同样维度,但是其值都是各维度的均值,offset是各个维度的均值
feats->AddVecToRows(1.0, offset); //将offset*1 加到现在的每个feats维度上就完成了均值的归一化
return;
}
// norm(0, d) = mean offset;
// norm(1, d) = scale, e.g. x(d) <-- x(d)*norm(1, d) + norm(0, d).
Matrix<BaseFloat> norm(2, dim);
for (int32 d = 0; d < dim; d++)
{
double mean, offset, scale;
mean = stats(0, d) / count; //统计均值
double var = (stats(1, d) / count) - mean*mean,
floor = 1.0e-20; //统计方差
if (var < floor)
{
KALDI_WARN << "Flooring cepstral variance from " << var << " to "
<< floor;
var = floor;
}
scale = 1.0 / sqrt(var); //计算标准差的倒数,作为尺度因子
if (scale != scale || 1 / scale == 0.0)
KALDI_ERR << "NaN or infinity in cepstral mean/variance computation";
offset = -(mean*scale);
norm(0, d) = offset;
norm(1, d) = scale;
}
// Apply the normalization.
feats->MulColsVec(norm.Row(1)); //Equivalent to (*this) = (*this) * diag(scale). Scaling each column by a scalar taken from that dimension of the vector. 方差归一化
feats->AddVecToRows(1.0, norm.Row(0)); // 均值归一化
}