ivector-extractor-testcc文件读书笔记

// ivector/ivector-extractor-test.cc

// Copyright 2013  Daniel Povey

// See ../../COPYING for clarification regarding multiple authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//  http://www.apache.org/licenses/LICENSE-2.0
//
// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
// MERCHANTABLITY OR NON-INFRINGEMENT.
// See the Apache 2 License for the specific language governing permissions and
// limitations under the License.

#include "gmm/model-test-common.h"
#include "gmm/full-gmm-normal.h"
#include "ivector/ivector-extractor.h"
#include "util/kaldi-io.h"


namespace kaldi {

void TestIvectorExtractorIO(const IvectorExtractor &extractor) {
  std::ostringstream ostr;
  bool binary = (Rand() % 2 == 0);
  extractor.Write(ostr, binary);
  std::istringstream istr(ostr.str());
  IvectorExtractor extractor2;
  extractor2.Read(istr, binary);
  std::ostringstream ostr2;
  extractor2.Write(ostr2, binary);
  KALDI_ASSERT(ostr.str() == ostr2.str());
}
void TestIvectorExtractorStatsIO(IvectorExtractorStats &stats) {
  std::ostringstream ostr;
  bool binary = (Rand() % 2 == 0);
  stats.Write(ostr, binary);
  std::istringstream istr(ostr.str());
  IvectorExtractorStats stats2;
  stats2.Read(istr, binary);
  std::ostringstream ostr2;
  stats2.Write(ostr2, binary);
  
  if (binary) {
    // this was failing in text mode, due to differences like
    // 8.2244e+06 vs  8.22440e+06
    KALDI_ASSERT(ostr.str() == ostr2.str());
  }
  
  { // Test I/O of IvectorExtractorStats and that it works identically with the "add"
    // mechanism.  We only test this with binary == true; otherwise it's not
    // identical due to limited precision.
    std::ostringstream ostr;
    bool binary = true;
    stats.Write(ostr, binary);
    IvectorExtractorStats stats2;
    {
      std::istringstream istr(ostr.str());
      stats2.Read(istr, binary);
    }
    {
      std::istringstream istr(ostr.str());
      stats2.Read(istr, binary, true); // add to existing.
    }
    IvectorExtractorStats stats3(stats);
    stats3.Add(stats);
    
    std::ostringstream ostr2;
    stats2.Write(ostr2, false);

    std::ostringstream ostr3;
    stats3.Write(ostr3, false);

    //if (binary) {
    //  KALDI_ASSERT(ostr2.str() == ostr3.str());
    //}
  }
}

void TestIvectorExtraction(const IvectorExtractor &extractor,
                           const MatrixBase<BaseFloat> &feats,
                           const FullGmm &fgmm) {
  if (extractor.IvectorDependentWeights())
    return;  // Nothing to do as online iVector estimator does not work in this
             // case.
  int32 num_frames = feats.NumRows(),
      feat_dim = feats.NumCols(),
      num_gauss = extractor.NumGauss(),
      ivector_dim = extractor.IvectorDim();
  Posterior post(num_frames);

  double tot_log_like = 0.0;
  for (int32 t = 0; t < num_frames; t++) {
    SubVector<BaseFloat> frame(feats, t);
    Vector<BaseFloat> posterior(fgmm.NumGauss(), kUndefined);
    tot_log_like += fgmm.ComponentPosteriors(frame, &posterior);
    for (int32 i = 0; i < posterior.Dim(); i++)
      post[t].push_back(std::make_pair(i, posterior(i)));
  }
    
  // The zeroth and 1st-order stats are in "utt_stats".
  IvectorExtractorUtteranceStats utt_stats(num_gauss, feat_dim,
                                           false);
  utt_stats.AccStats(feats, post);

  OnlineIvectorEstimationStats online_stats(extractor.IvectorDim(),
                                            extractor.PriorOffset(),
                                            0.0);
  
  for (int32 t = 0; t < num_frames; t++) {
    online_stats.AccStats(extractor, feats.Row(t), post[t]);
  }
  
  Vector<double> ivector1(ivector_dim), ivector2(ivector_dim);

  extractor.GetIvectorDistribution(utt_stats, &ivector1, NULL);

  int32 num_cg_iters = -1;  // for testing purposes, compute it exactly.
  online_stats.GetIvector(num_cg_iters, &ivector2);

  KALDI_LOG << "ivector1 = " << ivector1;
  KALDI_LOG << "ivector2 = " << ivector2;

  // objf change vs. default iVector.  note, here I'm using objf
  // and auxf pretty much interchangeably :-(
  double objf_change2 = online_stats.ObjfChange(ivector2) *
      utt_stats.NumFrames();

  Vector<double> ivector_baseline(ivector_dim);
  ivector_baseline(0) = extractor.PriorOffset();
  double objf_change1 = extractor.GetAuxf(utt_stats, ivector1) -
      extractor.GetAuxf(utt_stats, ivector_baseline);
  KALDI_LOG << "objf_change1 = " << objf_change1
            << ", objf_change2 = " << objf_change2;
  
  KALDI_ASSERT(ivector1.ApproxEqual(ivector2));
}


void UnitTestIvectorExtractor() {
  FullGmm fgmm;
  int32 dim = 5 + Rand() % 5, num_comp = 1 + Rand() % 5;
  KALDI_LOG << "Num Gauss = " << num_comp;
  unittest::InitRandFullGmm(dim, num_comp, &fgmm);
  FullGmmNormal fgmm_normal(fgmm);

  IvectorExtractorOptions ivector_opts;
  ivector_opts.ivector_dim = dim + 5;
  ivector_opts.use_weights = (Rand() % 2 == 0);
  KALDI_LOG << "Feature dim is " << dim
            << ", ivector dim is " << ivector_opts.ivector_dim;
  IvectorExtractor extractor(ivector_opts, fgmm);
  TestIvectorExtractorIO(extractor);

  IvectorExtractorStatsOptions stats_opts;
  if (Rand() % 2 == 0) stats_opts.update_variances = false;
  stats_opts.num_samples_for_weights = 100; // Improve accuracy
  // of estimation, since we do it with relatively few utterances,
  // and we're testing the convergence.

  int32 num_utts = 1 + Rand() % 5;
  std::vector<Matrix<BaseFloat> > all_feats(num_utts);
  for (int32 utt = 0; utt < num_utts; utt++) {
    int32 num_frames = 100 + Rand() % 200;
    if (Rand() % 2 == 0) num_frames *= 10;
    if (Rand() % 2 == 0) num_frames /= 1.0;
    Matrix<BaseFloat> feats(num_frames, dim);
    fgmm_normal.Rand(&feats);
    feats.Swap(&all_feats[utt]);
  }

  int32 num_iters = 4;
  double last_auxf_impr = 0.0, last_auxf = 0.0;
  for (int32 iter = 0; iter < num_iters; iter++) {
    IvectorExtractorStats stats(extractor, stats_opts);
      
    for (int32 utt = 0; utt < num_utts; utt++) {
      Matrix<BaseFloat> &feats = all_feats[utt];
      stats.AccStatsForUtterance(extractor, feats, fgmm);
      TestIvectorExtraction(extractor, feats, fgmm);
    }
    TestIvectorExtractorStatsIO(stats);
    
    IvectorExtractorEstimationOptions estimation_opts;
    estimation_opts.gaussian_min_count = dim + 5;
    double auxf = stats.AuxfPerFrame(),
        auxf_impr = stats.Update(estimation_opts, &extractor);

    KALDI_LOG << "Iter " << iter << ", auxf per frame was " << auxf
              << ", improvement in this update "
              << "phase was " << auxf_impr;
    if (iter > 0) {
      double auxf_change = auxf - last_auxf;
      KALDI_LOG << "Predicted auxf change from last update phase was "
                << last_auxf_impr << " versus observed change "
                << auxf_change;
      double wiggle_room = (ivector_opts.use_weights ? 5.0e-05 : 1.0e-08);
      // The weight update is (a) not exact, and (b) relies on sampling, [two
      // separate issues], so it might not always improve.  But with
      // a large number of "weight samples", it's OK.
      KALDI_ASSERT(auxf_change >= last_auxf_impr - wiggle_room);
    }
    last_auxf_impr = auxf_impr;
    last_auxf = auxf;
  }
  std::cout << "********************************************************************************************\n";
}

}

int main() {
  using namespace kaldi;
  SetVerboseLevel(5);
  for (int i = 0; i < 10; i++)
    UnitTestIvectorExtractor();
  std::cout << "Test OK.\n";
  return 0;
}

 

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 打赏
    打赏
  • 0
    评论
metadata-extractor是一个开源的Java库,它提供了一种方便的方式来从各种类型的文件中提取元数据。元数据是关于数据的数据,它描述了一份文件的属性、特征和内容。 metadata-extractor库支持从图片、音频、视频和各种文档格式(如PDF和Microsoft Office文档等)中提取元数据。它可以读取文件中的各种元数据信息,包括文件名称、大小、创建日期、修改日期、作者、描述、GPS位置、相机型号、拍摄日期、曝光时间、ISO速度等等。 使用metadata-extractor API非常简单。首先,我们需要将库添加到我们的项目中。然后,我们可以使用Java代码来创建一个Metadata对象,并将文件路径作为参数传递给它。然后,我们可以使用Metadata对象提供的各种方法来获取特定的元数据信息。 例如,如果我们想获取一张图片的拍摄日期和相机型号,我们可以使用如下代码: ``` File file = new File("image.jpg"); Metadata metadata = ImageMetadataReader.readMetadata(file); Directory directory = metadata.getFirstDirectoryOfType(ExifIFD0Directory.class); Date date = directory.getDate(ExifIFD0Directory.TAG_DATETIME); String cameraModel = directory.getString(ExifIFD0Directory.TAG_MODEL); ``` 这段代码首先创建了一个File对象,并将文件路径传递给ImageMetadataReader.readMetadata()方法来读取图片的元数据。然后,它使用getFirstDirectoryOfType()方法来获取ExifIFD0Directory目录,并从中获取拍摄日期和相机型号。 metadata-extractor API还提供了其他方便的方法来获取和解析不同类型文件的元数据。通过使用这个库,我们可以轻松地获取各种文件中的元数据,方便我们在应用程序中进行进一步处理和分析。
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

落雪snowflake

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值