kaldi--train_lstm_asr_sad_1a.sh代码学习

最新推荐文章于 2022-09-14 22:38:03 发布

陌上阳光

最新推荐文章于 2022-09-14 22:38:03 发布

阅读量972

点赞数 1

分类专栏： kaldi 文章标签： kaldi swbd

本文链接：https://blog.csdn.net/weixin_42831564/article/details/90296563

版权

kaldi 专栏收录该内容

10 篇文章 0 订阅

订阅专栏

train_lstm_asr_sad_1a.sh代码学习

#!/bin/bash

# Copyright 2017 Nagendra Kumar Goel
# Apache 2.0

# This is a script to train a TDNN-LSTM for speech activity detection (SAD) 
# using LSTM for long-context information.

# 设置了set -o pipefail，返回从右往左第一个非零返回值，即若程序有错误，返回exit status不等于零--即退出整个脚本
set -o pipefail
# set -u 就可以让脚本遇到错误时停止执行，并指出错误的行数信息
set -u

# At this script level we don't support not running on GPU, as it would be painfully slow.
# If you want to run without GPU you'd have to call train_tdnn.sh with --gpu false,
# --num-threads 16 and --minibatch-size 128.

stage=0            # 做判断 看代码运行到了第几步，也可以传递不同的数值，来让脚本执行不同阶段的代码 可用于测试
train_stage=-10
get_egs_stage=-10
egs_opts=   

# 块宽度
chunk_width=20

# 左右上下文 向左看60帧 不向右看
extra_left_context=60
extra_right_context=0

relu_dim=256
cell_dim=256 
projection_dim=64

# training options  训练选项
num_epochs=4                      # 轮数
initial_effective_lrate=0.0003    # 初始学习率
final_effective_lrate=0.00003     # 最终学习率
num_jobs_initial=3
num_jobs_final=8
remove_egs=true
max_param_change=0.2  # Small max-param change for small network
dropout_schedule='0,0@0.20,0.1@0.50,0'      # dropout正则化

egs_dir=
nj=40
feat_type=raw
config_dir=

dir=
affix=1a

# 训练特征数据路径 + label路径
data_dir=exp/segmentation_1a/train_whole_hires_bp
targets_dir=exp/segmentation_1a/train_whole_combined_targets_sub3

# 运行脚本 设置环境变量  执行./utils/parse_options.sh脚本  实现的功能？
. ./cmd.sh
if [ -f ./path.sh ]; then . ./path.sh; fi
. ./utils/parse_options.sh

# 当dir的值为空 长度为0（-z）设置$dir路径的值设置$dir路径的值
if [ -z "$dir" ]; then
  dir=exp/segmentation_1a/tdnn_lstm_asr_sad
fi

# ${var:+string} 只有当var不是空的时候才替换成string,若var为空时则不替换或者说是替换成变量var的值,即空值
# affix=1a不为空 dir=exp/segmentation_1a/tdnn_lstm_asr_sad_1a
dir=$dir${affix:+_$affix}

# cuda-compiled  CUDA并行计算架构
if ! cuda-compiled; then
  cat <<EOF && exit 1
This script is intended to be used with GPUs but you have not compiled Kaldi with CUDA
If you want to use GPUs (and have them), go to src/, and configure and make on a machine
where "nvcc" is installed.
EOF
fi

mkdir -p $dir

# chunk_width=20 400000/20=20000
samples_per_iter=`perl -e "print int(400000 / $chunk_width)"`

# 第五步：将DNN网络配置信息转换为config配置文件
# fast-lstmp-layer  LSTM层
if [ $stage -le 5 ]; then
  echo "$0: creating neural net configs using the xconfig parser";
  
  mkdir -p $dir/configs
  cat <<EOF > $dir/configs/network.xconfig
  input dim=`feat-to-dim scp:$data_dir/feats.scp -` name=input
  fixed-affine-layer name=lda input=Append(-2,-1,0,1,2) affine-transform-file=$dir/configs/lda.mat 

  relu-renorm-layer name=tdnn1 input=lda dim=$relu_dim add-log-stddev=true
  relu-renorm-layer name=tdnn2 input=Append(-1,0,1,2) dim=$relu_dim add-log-stddev=true
  relu-renorm-layer name=tdnn3 input=Append(-3,0,3,6) dim=$relu_dim add-log-stddev=true
  fast-lstmp-layer name=lstm1 cell-dim=$cell_dim recurrent-projection-dim=$projection_dim non-recurrent-projection-dim=$projection_dim decay-time=20 delay=-3 dropout-proportion=0.0
  relu-renorm-layer name=tdnn4 input=Append(-6,0,6,12) add-log-stddev=true dim=$relu_dim
  fast-lstmp-layer name=lstm2 cell-dim=$cell_dim recurrent-projection-dim=$projection_dim non-recurrent-projection-dim=$projection_dim decay-time=20 delay=-6 dropout-proportion=0.0
  relu-renorm-layer name=tdnn5 input=Append(-12,0,12,24) dim=$relu_dim

  output-layer name=output include-log-softmax=true dim=3 learning-rate-factor=0.1 input=tdnn5
EOF
  # 将文件内容转换为配置文件
  steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig \
    --config-dir $dir/configs/

  # 将targets数量写入到	$dir/configs/vars文件夹中
  cat <<EOF >> $dir/configs/vars
num_targets=3
EOF
fi

# 第六步： train_raw_rnn.py  row_model.py  get_egs_targets.sh 脚本调用、传参
if [ $stage -le 6 ]; then
  # wav文件数量
  num_utts=`cat $data_dir/utt2spk | wc -l`
  # Set num_utts_subset for diagnostics to a reasonable value
  # of max(min(0.005 * num_utts, 300), 12)
  # 将用于诊断的num-utts-utts子集设置为合理值 max(min(0.005 * num_utts, 300), 12)
  num_utts_subset=`perl -e '$n=int($ARGV[0] * 0.005); print ($n > 300 ? 300 : ($n < 12 ? 12 : $n))' $num_utts`

  steps/nnet3/train_raw_rnn.py --stage=$train_stage \                            # train_stage=-10
    --feat.cmvn-opts="--norm-means=false --norm-vars=false" \
    --egs.chunk-width=$chunk_width \                                             # chunk_width=20
    --egs.dir="$egs_dir" --egs.stage=$get_egs_stage \                            # egs_dir=     get_egs_stage=-10
    --egs.chunk-left-context=$extra_left_context \                               # extra_left_context=60
    --egs.chunk-right-context=$extra_right_context \                             # extra_right_context=0
    --egs.chunk-left-context-initial=0 \                                         # chunk-left-context-initial  当当前帧为第一帧（处于最左边），向左看的帧数为0，不以extra_left_context为向左看的标准
    --egs.chunk-right-context-final=0 \                                          # chunk-right-context-initial  当当前帧为最后一帧（处于最右边），向右看的帧数为0，不以extra_right_context为向右看的标准
    --trainer.num-epochs=$num_epochs \                                           # num_epochs=4 轮数
    --trainer.samples-per-iter=20000 \                                           # samples-per-iter
    --trainer.optimization.num-jobs-initial=$num_jobs_initial \
    --trainer.optimization.num-jobs-final=$num_jobs_final \
    --trainer.optimization.initial-effective-lrate=$initial_effective_lrate \    #initial_effective_lrate 学习率
    --trainer.optimization.final-effective-lrate=$final_effective_lrate \
    --trainer.optimization.shrink-value=0.99 \
    --trainer.dropout-schedule="$dropout_schedule" \
    --trainer.rnn.num-chunk-per-minibatch=128,64 \
    --trainer.optimization.momentum=0.5 \
    --trainer.deriv-truncate-margin=10 \
    --trainer.max-param-change=$max_param_change \
    --trainer.compute-per-dim-accuracy=true \
    --cmd="$decode_cmd" --nj 40 \
    --cleanup=true \
    --cleanup.remove-egs=$remove_egs \
    --cleanup.preserve-model-interval=10 \
    --use-gpu=true \
    --use-dense-targets=true \
    --feat-dir=$data_dir \
    --targets-scp="$targets_dir/targets.scp" \                                  # 生成frame-label对应关系的必要文件  targets.scp
    --egs.opts="--frame-subsampling-factor 3 --num-utts-subset $num_utts_subset" \
    --dir=$dir || exit 1
fi

# 第七步：
if [ $stage -le 7 ]; then
  copy-feats scp:$targets_dir/targets.scp ark:- | \
    matrix-sum-rows ark:- ark:- | vector-sum --binary=false ark:- - | \
    awk '{print " [ "$2" "$3" "$4" ]"}' > $dir/post_output.vec

  echo 3 > $dir/frame_subsampling_factor
fi

陌上阳光

关注

1
点赞
踩
2

收藏

觉得还不错? 一键收藏
0
评论
kaldi--train_lstm_asr_sad_1a.sh代码学习

train_lstm_asr_sad_1a.sh代码学习#!/bin/bash# Copyright 2017 Nagendra Kumar Goel# Apache 2.0# This is a script to train a TDNN-LSTM for speech activity detection (SAD) # using LSTM for long-context ...
复制链接

扫一扫

专栏目录