[Kaldi]中应用CMVN

Kaldi中应用CMVN

Author: Xin Pan

Date: 2020.01.14


在上一篇博客中记录了CMVN的统计量是如何被记录下来的之前的博客在这里。现在看CMVN如何应用在自己的feats上。

建议各位看官先看之前的这篇文章,可以对CMVN计算代码中的各个变量有好的认识。

Kaldi中compute-cmvn-stats用于计算CMVN的统计量,但是并不将其应用于feats中,apply-cmvn这个命令会将CMVN应用到feats中。

Kaldi官方对于apply-cmvn的解释

Usage

应用倒谱均值和(可选)方差标准化默认情况下为每句话,如果提供了utt2spk选项,则为每名发言者

用法:apply-cmvn[选项](|) e.g.:apply cmvn–utt2spk=ark:data/train/utt2spk scp:data/train/cmvn.scp scp:data/train/feats.scp ark:-

过程

实验继续使用aishell 1 的train set 进行。实验命令如下:

apply-cmvn --utt2spk=ark:data/train/utt2spk --norm-means=true --norm-vars=false scp:data/train/cmvn.scp scp:data/train/feats.scp ark:after_cmvn.ark
int main(int argc, char *argv[]) {
	try {
		using namespace kaldi;

		const char *usage =
			"Apply cepstral mean and (optionally) variance normalization\n"
			"Per-utterance by default, or per-speaker if utt2spk option provided\n"
			"Usage: apply-cmvn [options] (<cmvn-stats-rspecifier>|<cmvn-stats-rxfilename>) <feats-rspecifier> <feats-wspecifier>\n"
			"e.g.: apply-cmvn --utt2spk=ark:data/train/utt2spk scp:data/train/cmvn.scp scp:data/train/feats.scp ark:-\n"
			"See also: modify-cmvn-stats, matrix-sum, compute-cmvn-stats\n";

		ParseOptions po(usage);
		std::string utt2spk_rspecifier;
		bool norm_vars = false;
		bool norm_means = true;
		bool reverse = false;
		std::string skip_dims_str;

		po.Register("utt2spk", &utt2spk_rspecifier,
			"rspecifier for utterance to speaker map");
		po.Register("norm-vars", &norm_vars, "If true, normalize variances.");
		po.Register("norm-means", &norm_means, "You can set this to false to turn off mean "
			"normalization.  Note, the same can be achieved by using 'fake' CMVN stats; "
			"see the --fake option to compute_cmvn_stats.sh");
		po.Register("skip-dims", &skip_dims_str, "Dimensions for which to skip "
			"normalization: colon-separated list of integers, e.g. 13:14:15)");
		po.Register("reverse", &reverse, "If true, apply CMVN in a reverse sense, "
			"so as to transform zero-mean, unit-variance input into data "
			"with the given mean and variance.");

		po.Read(argc, argv);

		if (po.NumArgs() != 3) {
			po.PrintUsage();
			exit(1);
		}
		if (norm_vars && !norm_means)
             // 如果对方差归一化就一定要对均值归一化
			KALDI_ERR << "You cannot normalize the variance but not the mean.";


		std::string cmvn_rspecifier_or_rxfilename = po.GetArg(1);
		std::string feat_rspecifier = po.GetArg(2);
		std::string feat_wspecifier = po.GetArg(3);
		
		if (!norm_means) {
             //如果不进行均值归一化处理的话就是将特征重新重新写一次
			// CMVN is a no-op, we're not doing anything.  Just echo the input
			// don't even uncompress, if it was a CompressedMatrix.
			SequentialGeneralMatrixReader reader(feat_rspecifier);
			GeneralMatrixWriter writer(feat_wspecifier);
			kaldi::int32 num_done = 0;
			for (; !reader.Done(); reader.Next()) {
				writer.Write(reader.Key(), reader.Value());
				num_done++;
			}
			KALDI_LOG << "Copied " << num_done << " utterances.";
			return (num_done != 0 ? 0 : 1);
		}


		std::vector<int32> skip_dims;  // optionally use "fake"
									   // (zero-mean/unit-variance) stats for some
									   // dims to disable normalization.
		if (!SplitStringToIntegers(skip_dims_str, ":", false, &skip_dims)) {
			KALDI_ERR << "Bad --skip-dims option (should be colon-separated list of "
				<< "integers)";
		}


		kaldi::int32 num_done = 0, num_err = 0;

		SequentialBaseFloatMatrixReader feat_reader(feat_rspecifier);
		BaseFloatMatrixWriter feat_writer(feat_wspecifier);

		if (ClassifyRspecifier(cmvn_rspecifier_or_rxfilename, NULL, NULL)
			!= kNoRspecifier)
		{ // reading from a Table: per-speaker or per-utt CMN/CVN.
			std::string cmvn_rspecifier = cmvn_rspecifier_or_rxfilename;

			RandomAccessDoubleMatrixReaderMapped cmvn_reader(cmvn_rspecifier,
				utt2spk_rspecifier);

			for (; !feat_reader.Done(); feat_reader.Next())
			{
				std::string utt = feat_reader.Key();
				Matrix<BaseFloat> feat(feat_reader.Value());
				if (norm_means)
				{    //如果对均值进行归一化就进入这里
					if (!cmvn_reader.HasKey(utt))
					{
						KALDI_WARN << "No normalization statistics available for key "
							<< utt << ", producing no output for this utterance";
						num_err++;
						continue;
					}
					Matrix<double> cmvn_stats = cmvn_reader.Value(utt);
					if (!skip_dims.empty())
						FakeStatsForSomeDims(skip_dims, &cmvn_stats);

					if (reverse)
					{
						ApplyCmvnReverse(cmvn_stats, norm_vars, &feat);
					}
					else
					{
                          // 因为我在使用的时候都是简单的参数设置没有什么更改,所以实际的均值方差归一化在下边的ApplyCmvn进行。这个函数的定义在下边
						ApplyCmvn(cmvn_stats, norm_vars, &feat);
					}
					feat_writer.Write(utt, feat);
				}
				else
				{
                      // 不对均值进行归一化进入这里,就直接写结果了
					feat_writer.Write(utt, feat);
				}
				num_done++;
			}
		}
		else
		{
			if (utt2spk_rspecifier != "")
				KALDI_ERR << "--utt2spk option not compatible with rxfilename as input "
				<< "(did you forget ark:?)";
			std::string cmvn_rxfilename = cmvn_rspecifier_or_rxfilename;
			bool binary;
			Input ki(cmvn_rxfilename, &binary);
			Matrix<double> cmvn_stats;
			cmvn_stats.Read(ki.Stream(), binary);
			if (!skip_dims.empty())
				FakeStatsForSomeDims(skip_dims, &cmvn_stats);

			for (; !feat_reader.Done(); feat_reader.Next()) {

				std::string utt = feat_reader.Key();
				Matrix<BaseFloat> feat(feat_reader.Value());
				if (norm_means)
				{
					if (reverse)
					{
						ApplyCmvnReverse(cmvn_stats, norm_vars, &feat);
					}
					else
					{
						ApplyCmvn(cmvn_stats, norm_vars, &feat);
					}
				}
				feat_writer.Write(utt, feat);
				num_done++;
			}
		}
		if (norm_vars)
			KALDI_LOG << "Applied cepstral mean and variance normalization to "
			<< num_done << " utterances, errors on " << num_err;
		else
			KALDI_LOG << "Applied cepstral mean normalization to "
			<< num_done << " utterances, errors on " << num_err;
		return (num_done != 0 ? 0 : 1);
	}
	catch (const std::exception &e)
	{
		std::cerr << e.what();
		return -1;
	}
}

在src/transform/cmvn.cc

void ApplyCmvn(const MatrixBase<double> &stats,
	bool var_norm,
	MatrixBase<BaseFloat> *feats)
{
    // 如果进入到了这里首先可以确定就是会对均值进行归一化计算
	KALDI_ASSERT(feats != NULL);
	int32 dim = stats.NumCols() - 1;
	if (stats.NumRows() > 2 || stats.NumRows() < 1 || feats->NumCols() != dim)
	{
		KALDI_ERR << "Dim mismatch: cmvn "
			<< stats.NumRows() << 'x' << stats.NumCols()
			<< ", feats " << feats->NumRows() << 'x' << feats->NumCols();
	}
	if (stats.NumRows() == 1 && var_norm)
		KALDI_ERR << "You requested variance normalization but no variance stats "
		<< "are supplied.";

	double count = stats(0, dim); // count 计算的是现在的这个cmvn状态是由多少帧统计得到
	// Do not change the threshold of 1.0 here: in the balanced-cmvn code, when
	// computing an offset and representing it as stats, we use a count of one.
	if (count < 1.0)
		KALDI_ERR << "Insufficient stats for cepstral mean and variance normalization: "
		<< "count = " << count;

	if (!var_norm)
	{	// 如果不对方差归一化的话会进入这里
		Vector<BaseFloat> offset(dim);
		SubVector<double> mean_stats(stats.RowData(0), dim); //mean_stats是从stats中拿的均值部分
		offset.AddVec(-1.0 / count, mean_stats); // offset现在得到的是feats同样维度,但是其值都是各维度的均值,offset是各个维度的均值
		feats->AddVecToRows(1.0, offset); //将offset*1 加到现在的每个feats维度上就完成了均值的归一化
		return;
	}
	// norm(0, d) = mean offset;
	// norm(1, d) = scale, e.g. x(d) <-- x(d)*norm(1, d) + norm(0, d).
	Matrix<BaseFloat> norm(2, dim);
	for (int32 d = 0; d < dim; d++)
	{
		double mean, offset, scale;
		mean = stats(0, d) / count; //统计均值
		double var = (stats(1, d) / count) - mean*mean,
			floor = 1.0e-20; //统计方差
		if (var < floor)
		{
			KALDI_WARN << "Flooring cepstral variance from " << var << " to "
				<< floor;
			var = floor;
		}
		scale = 1.0 / sqrt(var); //计算标准差的倒数,作为尺度因子
		if (scale != scale || 1 / scale == 0.0)
			KALDI_ERR << "NaN or infinity in cepstral mean/variance computation";
		offset = -(mean*scale);
		norm(0, d) = offset;
		norm(1, d) = scale;
	}
	// Apply the normalization.
	feats->MulColsVec(norm.Row(1)); //Equivalent to (*this) = (*this) * diag(scale). Scaling each column by a scalar taken from that dimension of the vector. 方差归一化
	feats->AddVecToRows(1.0, norm.Row(0)); // 均值归一化
}

Reference

MulColsVec()这个函数的解释

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值