一、生成语言模型
1、需要准备
lexicon.txt声学字典;
words.txt语料:lexicon.txt字典删除所有的音素即是;
phones.txt语料:lexicon.txt字典删除所有的单词即是;
2、lm_word生成
ngram-count -vocab lexicon.txt -text words.txt -order 3 -write my.count -unk
ngram-count -read my.count -order 3 -lm word.3gram.lm -interpolate
3、lm_phone生成
ngram-count -vocab lexicon.txt -text phones.txt -order 3 -write my.count -unk
ngram-count -read my.count -order 3 -lm phone.3gram.lm -interpolate
二、生成训练所需要的数据
dev|test|train|test_phone/utt2spk、text、spk2utt、wav.scp
test_phone放test的text
将dev|test|train|test_phone拷贝到s5/data
三、通过lexicon.txt 生成lexiconp.txt等
local/prepare_dict.sh /home/wangyanwei/work/datasets/magicdata/resource/dict
四、开始训练GMM-HMM模型
1、修改run.sh数据集目录
#corpus and trans directory
thchs=/home/wangyanwei/work/dataset/magicdata
2、删除thchs30中数据准备部分的代码,添加处理我们自己数据的代码
#data preparation
#generate text, wav.scp, utt2pk, spk2utt
#local/thchs-30_data_prep.sh $H $thchs/data_thchs30 || exit 1;
for x in train dev test; do
cd data/$x
echo "preparing scps and text in data/$x"
sort wav.scp -o wav.scp
sort utt2spk -o utt2spk
sort text -o text
cd -
done
mkdir data/test_phone
cp data/test/text data/test_phone
3、将run.sh中的训练dnn的dae的脚本注释掉
#train dnn model
#local/nnet/run_dnn.sh --stage 0 --nj $n exp/tri4b exp/tri4b_ali exp/tri4b_ali_cv || exit 1;
#train dae model
#python2.6 or above is required for noisy data generation.
#To speed up the process, pyximport for python is recommeded.
#local/dae/run_dae.sh $thchs || exit 1;
4、开始训练
./run.sh
五、创建nnet3目录并创建run_ivector_common.sh和run_tdnn.sh
这两个脚本可以从wsj/local/nnet3下拷贝过来
mkdir local/nnet3
cp ../wsj/local/nnet3/run_ivector_common.sh local/nnet3
cp ../wsj/local/nnet3/tuning/run_tdnn_1a.sh local/nnet3/run_tdnn.sh
分别修改run_ivector_common.sh和run.tdnn.sh数据集部分
train_set=mfcc/train # you might set this to e.g. train.
test_sets="mfcc/dev mfcc/test"
六、训练声纹模型
local/nnet3/run_ivector_common.sh --stage 0 --nj 30 \
--train-set mfcc/train --gmm tri4b \
--num-threads-ubm 32 \
--nnet3-affix ""
七、训练tdnn
1、从data/tri4b/graph_word/ 创建data/tri4b/graph_tgpr/和data/tri4b/graph_bd_tgpr/的软链接
cd exp/tri4b
ln -s graph_word graph_tgpr
ln -s graph_word graph_bd_tgpr
cd -
2、从data/mfcc/dev_hires|test_hires|train_sp_hires创建data/dev_hires|test_hires|train_sp_hires的软链接
cd data
ln -s mfcc/dev_hires dev_hires
ln -s mfcc/test_hires test_hires
ln -s mfcc/train_sp_hires train_sp_hires
3、修改GPU模式
sudo nvidia-smi -c 3
4、修改local/nnet3/run_tdnn.sh
把–use-gpu=true
设置成 --use-gpu=wait \
steps/nnet3/train_dnn.py --stage=$train_stage \
--cmd="$decode_cmd" \
--feat.online-ivector-dir=$train_ivector_dir \
--feat.cmvn-opts="--norm-means=false --norm-vars=false" \
--trainer.srand=$srand \
--trainer.max-param-change=2.0 \
--trainer.num-epochs=3 \
--trainer.samples-per-iter=400000 \
--trainer.optimization.num-jobs-initial=2 \
--trainer.optimization.num-jobs-final=9 \
--trainer.optimization.initial-effective-lrate=0.0015 \
--trainer.optimization.final-effective-lrate=0.00015 \
--trainer.optimization.minibatch-size=256,128 \
--egs.dir="$common_egs_dir" \
--cleanup.remove-egs=$remove_egs \
--use-gpu=wait \
--feat-dir=$train_data_dir \
--ali-dir=$ali_dir \
--lang=data/lang \
--reporting.email="$reporting_email" \
--dir=$dir || exit 1;
fi
5、开始训练tdnn
local/nnet3/run_tdnn.sh --stage 0 --nj 16 exp/tri4b