Kaldi 实战学习(1)说话人识别小例子(egs/aishell/v1)

代码

进入:~/kaldi/egs/aishell/v1

# 地址变量(引号可省略,但是=之间不可有空格!)
data=/export/a05/xna/data
data_url=www.openslr.org/resources/33

# 载入引用文件(使用.,类似import)
. ./cmd.sh
. ./path.sh

# 设置bash出错则退出
set -e # exit on error

# 下载和解压数据(应该是两者选一)
local/download_and_untar.sh $data $data_url data_aishell
local/download_and_untar.sh $data $data_url resource_aishell
#(download_and_untar.sh <数据/数据下载地址> <数据输出位置>)

# 数据准备(应该是提取出wav数据,并且生成“说明文档”?)
local/aishell_data_prep.sh $data/data_aishell/wav $data/data_aishell/transcript

# 提取MFCC特征(建议使用一个大容量的磁盘来储存mfcc)
# 每个wav首先被分为很多帧,每帧用一串数字来表示,mfcc就是这串数字

mfccdir=mfcc

for x in train test; do

  steps/make_mfcc.sh --cmd "$train_cmd" --nj 10 data/$x exp/make_mfcc/$x $mfccdir
  # make_mfcc.sh <数据所在文件夹> <日志文件夹> <mfcc输出文件夹>;
  # -- 符号后面接的都是超参数设置
  
  sid/compute_vad_decision.sh --nj 10 --cmd "$train_cmd" data/$x exp/make_mfcc/$x $mfccdir
  # compute_vad_decision.sh <数据所在文件夹> <日志文件夹> <mfcc输出文件夹>
  
  utils/fix_data_dir.sh data/$x
  # fix_data_dir.sh 只需要一个参数
 
done

# 训练 diag ubm
sid/train_diag_ubm.sh --nj 10 --cmd "$train_cmd" --num-threads 16 \
  data/train 1024 exp/diag_ubm_1024
  
# 训练 full ubm
sid/train_full_ubm.sh --nj 10 --cmd "$train_cmd" data/train \
  exp/diag_ubm_1024 exp/full_ubm_1024
  
# 训练 ivector 
sid/train_ivector_extractor.sh --cmd "$train_cmd --mem 10G" \
  --num-iters 5 exp/full_ubm_1024/final.ubm data/train \
  exp/extractor_1024

# 提取 ivector
sid/extract_ivectors.sh --cmd "$train_cmd" --nj 10 \
  exp/extractor_1024 data/train exp/ivector_train_1024

# 训练 plda
$train_cmd exp/ivector_train_1024/log/plda.log \
  ivector-compute-plda ark:data/train/spk2utt \
  'ark:ivector-normalize-length scp:exp/ivector_train_1024/ivector.scp  ark:- |' \
  exp/ivector_train_1024/plda

# --- ↓ split the test to enroll and eval ↓ ---
mkdir -p data/test/enroll data/test/eval
cp data/test/{spk2utt,feats.scp,vad.scp} data/test/enroll
cp data/test/{spk2utt,feats.scp,vad.scp} data/test/eval

local/split_data_enroll_eval.py data/test/utt2spk  data/test/enroll/utt2spk  data/test/eval/utt2spk
# split_data_enroll_eval.py 
# 将测试集(utt2spk)拆分为注册集和验证集
# 输入:test utt2spk;输出:enroll utt2spk, eval utt2spk
# 对每一个说话人,随机选取其下的3条音频作为验证集,剩下其它的用作验证集测试

trials=data/test/aishell_speaker_ver.lst
local/produce_trials.py data/test/eval/utt2spk $trials
# local/produce_trials.py
# 这个脚本用来生成 trails 文件:uttid spkid target|nontarget

utils/fix_data_dir.sh data/test/enroll
utils/fix_data_dir.sh data/test/eval
# utils/fix_data_dir.sh
# This script makes sure that only the segments present in
# all of "feats.scp", "wav.scp" [if present], segments [if present]
# text, and utt2spk are present in any of them.
# It puts the original contents of data-dir into
# data-dir/.backup

# --- ↑ split the test to enroll and eval ↑ ---

# 提取注册集的 ivector
sid/extract_ivectors.sh --cmd "$train_cmd" --nj 10 \
  exp/extractor_1024 data/test/enroll  exp/ivector_enroll_1024

# 提取验证集的 ivector 
sid/extract_ivectors.sh --cmd "$train_cmd" --nj 10 \
  exp/extractor_1024 data/test/eval  exp/ivector_eval_1024

# 计算 plda 分数

$train_cmd exp/ivector_eval_1024/log/plda_score.log \
  ivector-plda-scoring --num-utts=ark:exp/ivector_enroll_1024/num_utts.ark \
  exp/ivector_train_1024/plda \
  ark:exp/ivector_enroll_1024/spk_ivector.ark \
  "ark:ivector-normalize-length scp:exp/ivector_eval_1024/ivector.scp ark:- |" \
  "cat '$trials' | awk '{print \\\$2, \\\$1}' |" exp/trials_out

# ~kaldi/src/ivectorbin/ivector-plda-scoring.cc
# 使用PLDA模型计算trails的对数似然比(og-likelihood ratios)
# 输入(trials-file):"<key1> <key2>\n"
# 输出: "<key1> <key2> [<dot-product>]\n"

# 计算 EER 
awk '{print $3}' exp/trials_out | paste - $trials | awk '{print $1, $4}' | compute-eer -

# Result
# Scoring against data/test/aishell_speaker_ver.lst
# Equal error rate is 0.140528%, at threshold -12.018

exit 0

收获

  1. linux shell 脚本的一些基础知识:比如$变量<赋>=<值>--超参数设置.载入
  2. kaldi做说话人识别的大致框架

参考:

  1. 从说话人识别demo开始学习kaldi–(1)run.sh
  2. kaldi入门:搭建第一个中文ASR (AISHELL-1)
评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值