train_lstm_asr_sad_1a.sh代码学习
#!/bin/bash
# Copyright 2017 Nagendra Kumar Goel
# Apache 2.0
# This is a script to train a TDNN-LSTM for speech activity detection (SAD)
# using LSTM for long-context information.
# 设置了set -o pipefail,返回从右往左第一个非零返回值,即若程序有错误,返回exit status不等于零--即退出整个脚本
set -o pipefail
# set -u 就可以让脚本遇到错误时停止执行,并指出错误的行数信息
set -u
# At this script level we don't support not running on GPU, as it would be painfully slow.
# If you want to run without GPU you'd have to call train_tdnn.sh with --gpu false,
# --num-threads 16 and --minibatch-size 128.
stage=0 # 做判断 看代码运行到了第几步,也可以传递不同的数值,来让脚本执行不同阶段的代码 可用于测试
train_stage=-10
get_egs_stage=-10
egs_opts=
# 块宽度
chunk_width=20
# 左右上下文 向左看60帧 不向右看
extra_left_context=60
extra_right_context=0
relu_dim=256
cell_dim=256
projection_dim=64
# training options 训练选项
num_epochs=4 # 轮数
initial_effective_lrate=0.0003 # 初始学习率
final_effective_lrate=0.00003 # 最终学习率
num_jobs_initial=3
num_jobs_final=8
remove_egs=true
max_param_change=0.2 # Small max-param change for small network
dropout_schedule='0,0@0.20,0.1@0.50,0' # dropout正则化
egs_dir=
nj=40
feat_type=raw
config_dir=
dir=
affix=1a
# 训练特征数据路径 + label路径
data_dir=exp/segmentation_1a/train_whole_hires_bp
targets_dir=exp/segmentation_1a/train_whole_combined_targets_sub3
# 运行脚本 设置环境变量 执行./utils/parse_options.sh脚本 实现的功能?
. ./cmd.sh
if [ -f ./path.sh ]; then . ./path.sh; fi
. ./utils/parse_options.sh
# 当dir的值为空 长度为0(-z)设置$dir路径的值设置$dir路径的值
if [ -z "$dir" ]; then
dir=exp/segmentation_1a/tdnn_lstm_asr_sad
fi
# ${var:+string} 只有当var不是空的时候才替换成string,若var为空时则不替换或者说是替换成变量var的值,即空值
# affix=1a不为空 dir=exp/segmentation_1a/tdnn_lstm_asr_sad_1a
dir=$dir${affix:+_$affix}
# cuda-compiled CUDA并行计算架构
if ! cuda-compiled; then
cat <<EOF && exit 1
This script is intended to be used with GPUs but you have not compiled Kaldi with CUDA
If you want to use GPUs (and have them), go to src/, and configure and make on a machine
where "nvcc" is installed.
EOF
fi
mkdir -p $dir
# chunk_width=20 400000/20=20000
samples_per_iter=`perl -e "print int(400000 / $chunk_width)"`
# 第五步:将DNN网络配置信息转换为config配置文件
# fast-lstmp-layer LSTM层
if [ $stage -le 5 ]; then
echo "$0: creating neural net configs using the xconfig parser";
mkdir -p $dir/configs
cat <<EOF > $dir/configs/network.xconfig
input dim=`feat-to-dim scp:$data_dir/feats.scp -` name=input
fixed-affine-layer name=lda input=Append(-2,-1,0,1,2) affine-transform-file=$dir/configs/lda.mat
relu-renorm-layer name=tdnn1 input=lda dim=$relu_dim add-log-stddev=true
relu-renorm-layer name=tdnn2 input=Append(-1,0,1,2) dim=$relu_dim add-log-stddev=true
relu-renorm-layer name=tdnn3 input=Append(-3,0,3,6) dim=$relu_dim add-log-stddev=true
fast-lstmp-layer name=lstm1 cell-dim=$cell_dim recurrent-projection-dim=$projection_dim non-recurrent-projection-dim=$projection_dim decay-time=20 delay=-3 dropout-proportion=0.0
relu-renorm-layer name=tdnn4 input=Append(-6,0,6,12) add-log-stddev=true dim=$relu_dim
fast-lstmp-layer name=lstm2 cell-dim=$cell_dim recurrent-projection-dim=$projection_dim non-recurrent-projection-dim=$projection_dim decay-time=20 delay=-6 dropout-proportion=0.0
relu-renorm-layer name=tdnn5 input=Append(-12,0,12,24) dim=$relu_dim
output-layer name=output include-log-softmax=true dim=3 learning-rate-factor=0.1 input=tdnn5
EOF
# 将文件内容转换为配置文件
steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig \
--config-dir $dir/configs/
# 将targets数量写入到 $dir/configs/vars文件夹中
cat <<EOF >> $dir/configs/vars
num_targets=3
EOF
fi
# 第六步: train_raw_rnn.py row_model.py get_egs_targets.sh 脚本调用、传参
if [ $stage -le 6 ]; then
# wav文件数量
num_utts=`cat $data_dir/utt2spk | wc -l`
# Set num_utts_subset for diagnostics to a reasonable value
# of max(min(0.005 * num_utts, 300), 12)
# 将用于诊断的num-utts-utts子集设置为合理值 max(min(0.005 * num_utts, 300), 12)
num_utts_subset=`perl -e '$n=int($ARGV[0] * 0.005); print ($n > 300 ? 300 : ($n < 12 ? 12 : $n))' $num_utts`
steps/nnet3/train_raw_rnn.py --stage=$train_stage \ # train_stage=-10
--feat.cmvn-opts="--norm-means=false --norm-vars=false" \
--egs.chunk-width=$chunk_width \ # chunk_width=20
--egs.dir="$egs_dir" --egs.stage=$get_egs_stage \ # egs_dir= get_egs_stage=-10
--egs.chunk-left-context=$extra_left_context \ # extra_left_context=60
--egs.chunk-right-context=$extra_right_context \ # extra_right_context=0
--egs.chunk-left-context-initial=0 \ # chunk-left-context-initial 当当前帧为第一帧(处于最左边),向左看的帧数为0,不以extra_left_context为向左看的标准
--egs.chunk-right-context-final=0 \ # chunk-right-context-initial 当当前帧为最后一帧(处于最右边),向右看的帧数为0,不以extra_right_context为向右看的标准
--trainer.num-epochs=$num_epochs \ # num_epochs=4 轮数
--trainer.samples-per-iter=20000 \ # samples-per-iter
--trainer.optimization.num-jobs-initial=$num_jobs_initial \
--trainer.optimization.num-jobs-final=$num_jobs_final \
--trainer.optimization.initial-effective-lrate=$initial_effective_lrate \ #initial_effective_lrate 学习率
--trainer.optimization.final-effective-lrate=$final_effective_lrate \
--trainer.optimization.shrink-value=0.99 \
--trainer.dropout-schedule="$dropout_schedule" \
--trainer.rnn.num-chunk-per-minibatch=128,64 \
--trainer.optimization.momentum=0.5 \
--trainer.deriv-truncate-margin=10 \
--trainer.max-param-change=$max_param_change \
--trainer.compute-per-dim-accuracy=true \
--cmd="$decode_cmd" --nj 40 \
--cleanup=true \
--cleanup.remove-egs=$remove_egs \
--cleanup.preserve-model-interval=10 \
--use-gpu=true \
--use-dense-targets=true \
--feat-dir=$data_dir \
--targets-scp="$targets_dir/targets.scp" \ # 生成frame-label对应关系的必要文件 targets.scp
--egs.opts="--frame-subsampling-factor 3 --num-utts-subset $num_utts_subset" \
--dir=$dir || exit 1
fi
# 第七步:
if [ $stage -le 7 ]; then
copy-feats scp:$targets_dir/targets.scp ark:- | \
matrix-sum-rows ark:- ark:- | vector-sum --binary=false ark:- - | \
awk '{print " [ "$2" "$3" "$4" ]"}' > $dir/post_output.vec
echo 3 > $dir/frame_subsampling_factor
fi