Espent环境配置与实践
代码来源:espnet/espnet: End-to-End Speech Processing Toolkit (github.com)
官网文档:Common usages — ESPnet 202402 documentation
1. 环境配置
通过官方文档可得我们指定 PyTorch=1.12.1 CUDA=11.6。我们还支持许多其他版本。请查看 https://github.com/espnet/espnet/blob/master/tools/installers/install_torch.sh 以获取详细的版本列表。
安装CUDA=11.6
# 下载
# cuda的安装和配置:https://blog.csdn.net/weixin_46560570/article/details/140754242?spm=1001.2014.3001.5501
wget https://developer.download.nvidia.com/compute/cuda/11.6.2/local_installers/cuda_11.6.2_510.47.03_linux.run
# 安装
sh ./cuda_12.2.1_535.86.10_linux.run \
--silent \
--toolkit \
--installpath=/s6home/lnj524/module/cuda/cuda-11.6 \
--no-opengl-libs \
--no-drm \
--no-man-page
# 添加环境变量
vim ~/.bashrc
#将下方内容写入.bashrc
export CUDA_HOME=/s6home/lnj524/module/cuda/cuda-11.6
export PATH=$CUDA_HOME/bin:$PATH
export LD_LIBRARY_PATH=$CUDA_HOME/lib64:$LD_LIBRARY_PATH
export LD_LIBRARY_PATH=$CUDA_HOME/lib:$LD_LIBRARY_PATH
export LD_LIBRARY_PATH=$CUDA_HOME/extras/CUPTI/lib64:$LD_LIBRARY_PATH
export CUDAToolkit_ROOT_DIR=$CUDA_HOME
export CUDAToolkit_ROOT=$CUDA_HOME
export CUDA_TOOLKIT_ROOT_DIR=$CUDA_HOME
export CUDA_TOOLKIT_ROOT=$CUDA_HOME
export CUDA_BIN_PATH=$CUDA_HOME
export CUDA_PATH=$CUDA_HOME
export CUDA_INC_PATH=$CUDA_HOME/targets/x86_64-linux
export CFLAGS=-I$CUDA_HOME/targets/x86_64-linux/include:$CFLAGS
export CUDAToolkit_TARGET_DIR=$CUDA_HOME/targets/x86_64-linux
# 更新用户环境
source ~/.bashrc
# 验证
nvcc -V
安装Pytorch
# 克隆espnet代码
git clone https://gitee.com/chengsili/espnet.git
# 创建虚拟环境
# conda的安装和配置:https://blog.csdn.net/weixin_46560570/article/details/140754242?spm=1001.2014.3001.5501
conda create -n espnet python=3.9
# 激活虚拟环境
conda activate espnet
# 安装Pytorch
conda install pytorch==1.12.1 torchvision==0.13.1 torchaudio==0.12.1 cudatoolkit=11.6 -c pytorch -c conda-forge
# 安装cudnn
conda install cudnn
项目配置
cd /s6home/lnj524/module/espnet/tools
# 链接cuda
./setup_cuda_env.sh /home/lnj524/module/cuda/cuda-11.6
# 链接python
./setup_python.sh $(command -v python3)
# 此步骤后,tools下生成activate_python.sh
#查看activate_python.sh
cat activate_python.sh
#!/usr/bin/env bash
# THIS FILE IS GENERATED BY tools/setup_python.sh
export PYTHONUSERBASE="/home/lnj524/module/espnet/tools/python_user_base"
export PATH="/home/lnj524/module/espnet/tools/python_user_base/bin":${PATH}
export PATH=/home/lnj524/miniconda3/envs/espnet/bin:${PATH}
export NLTK_DATA="${PYTHONUSERBASE}/nltk_data" # NLTK_DATA环境变量是后添上的,前三个变量是自动生成的。
# 报错记录
# NLTK_DATA环境变量,如果没有这个,nltk_data文件会产生在用户文件夹下。
# 若报错XXX.zip错误,自行安装和解压。
# https://gitee.com/qwererer2/nltk_data/blob/gh-pages/packages/corpora/cmudict.zip
# https://gitee.com/qwererer2/nltk_data/blob/gh-pages/packages/taggers/averaged_perceptron_tagger.zip
安装相关依赖
# 踩坑记录
# 服务器无法访问github,使用make安装时,网络连接超时。
# 解决方法:
cd /home/lnj524/module/espnet/tools/installers
# 检查文件目录下所有的.sh文件,将文件中的github上的项目url改成gitee上的项目url。
# 我改好的:https://gitee.com/chengsili/espent_tools_installers.git
# sph2pipe.sh中下载2.5.tar.gz使用:https://gitee.com/chengsili/sph2pipe/repository/archive/2.5.tar.gz
# SCTK.sh中下载9688a26.tar.gz使用:https://gitee.com/chengsili/SCTK/repository/archive/9688a26.tar.gz
# kenlm.sh中boost服务器可能下载不下来,如果你遇到了请看下文运行aishell,报错kenlm找不到。
cd /s6home/lnj524/module/espnet/tools
make TH_VERSION=1.12.1 CUDA_VERSION=11.6
pip install cmake
pip install sox
# 到此环境配置完成
# 检查安装,具体细节请访问:https://espnet.github.io/espnet/tutorial.html#transducer
python3 check_install.py
2. an4
数据集
按照官网进行测试即可,注意Stage 11: ASR Training,设置GPU数量和使用的GPU编号
# 使用GPU 0和GPU 1
export CUDA_VISIBLE_DEVICES=0,1
# 训练
./asr.sh --stage 11 --stop_stage 11 --train_set train_nodev --valid_set train_dev --test_sets "train_dev test" --ngpu 2 --asr_config conf/train_asr_demo_transformer.yaml
3.aishell
数据集
3.1kenlm
报错
# 踩坑记录 - 安装kenlm
# 使用pip或者conda进行安装kenlm。使用conda list可看到kenlm=0.2.0,但是运行 ./asr.sh --stage 1 --stop_stage 1 ....
# 仍然报错找不到kenlm。
# 解决方法,使用espnet提供的脚本。进入installers文件夹,运行./install_kenlm.sh报错,网络连接超时,可自行下载。
# https://boostorg.jfrog.io/artifactory/main/release/1.81.0/source/boost_1_81_0.tar.bz2
# 然后自行解压:tar xvf boost_1_81_0.tar.bz2,随后更改install_kenlm.sh,再次运行。
# 具体如下:
cd /home/lnj524/module/espnet/tools/installers
# 将你下载好的压缩包放在installers目录下
tar xvf boost_1_81_0.tar.bz2
vim install_kenlm.sh
#更改后的install_kenlm.sh
#!/usr/bin/env bash
set -euo pipefail
if [ $# != 0 ]; then
echo "Usage: $0"
exit 1;
fi
boost_version=1.81.0
if [ ! -d boost_${boost_version//./_}_build ]; then
(
set -euo pipefail
cd boost_${boost_version//./_}
./bootstrap.sh
./b2 install --prefix=$(pwd)/../boost_${boost_version//./_}_build install
)
fi
if [ ! -d kenlm ]; then
git clone https://gitee.com/chengsili/kenlm.git
fi
(
set -euo pipefail
cd kenlm
mkdir -p build
(
set -euo pipefail
cd build && cmake -DCMAKE_PREFIX_PATH=$(pwd)/../../boost_${boost_version//./_}_build .. && make
)
(
set -euo pipefail
python3 -m pip install -e .
)
)
# 安装完成后,发现kenlm文件在installers目录下,将其移动到tools下即可
cd /home/lnj524/module/espnet/tools/installers
mv kenlm/ ../
3.2分阶段进行
数据准备
# 按照脚本执行第一步,数据集下载时间较长,自行下载数据集,更改data.sh中的数据集路径。
vim /home/lnj524/module/espnet/egs2/aishell/asr1/local/data.sh
AISHELL=/home/lnj524/module/data/opensource_data/aishell
第 1 阶段:数据准备
# 第 1 阶段:数据准备
./asr.sh --stage 1 --stop_stage 1 --train_set train --valid_set dev --test_sets "dev test"
espnet/egs2/aishell/asr1/data$ ls
dev local test token_list train
espnet/egs2/aishell/asr1/data$ ll train
spk2utt # 每个说话者的 ID 及其所有的发言列表
text # 每个发言的转录文本
utt2spk # 每个发言 ID 及其对应的说话者 ID
wav.scp # 列出音频文件的路径
第 3 阶段:格式化
# 第 3 阶段:格式化 wav.scp: data/ -> dump/raw
./asr.sh --stage 3 --stop_stage 3 --train_set train --valid_set dev --test_sets "dev test"
2024-08-09T18:40:02 (asr.sh:283:main) ./asr.sh --stage 3 --stop_stage 3 --train_set train --valid_set dev --test_sets dev test
2024-08-09T18:40:02 (asr.sh:321:main) Info: The valid_set 'dev' is included in the test_sets. '--eval_valid_set true' is set and 'dev' is removed from the test_sets
2024-08-09T18:40:02 (asr.sh:564:main) Skipped stages: 9 14 15
2024-08-09T18:40:02 (asr.sh:614:main) Stage 3: Format wav.scp: data/ -> dump/raw
utils/copy_data_dir.sh: copied data from data/train to dump/raw/org/train
utils/validate_data_dir.sh: Successfully validated data-directory dump/raw/org/train
2024-08-09T18:40:04 (format_wav_scp.sh:46:main) scripts/audio/format_wav_scp.sh --nj 32 --cmd run.pl --audio-format flac --fs 16k --multi-columns-input false --multi-columns-output false data/train/wav.scp dump/raw/org/train
2024-08-09T18:40:05 (format_wav_scp.sh:118:main) [info]: without segments
2024-08-09T18:47:10 (format_wav_scp.sh:153:main) Successfully finished. [elapsed=426s]
utils/copy_data_dir.sh: copied data from data/dev to dump/raw/org/dev
utils/validate_data_dir.sh: Successfully validated data-directory dump/raw/org/dev
2024-08-09T18:47:10 (format_wav_scp.sh:46:main) scripts/audio/format_wav_scp.sh --nj 32 --cmd run.pl --audio-format flac --fs 16k --multi-columns-input false --multi-columns-output false data/dev/wav.scp dump/raw/org/dev
2024-08-09T18:47:10 (format_wav_scp.sh:118:main) [info]: without segments
2024-08-09T18:47:56 (format_wav_scp.sh:153:main) Successfully finished. [elapsed=46s]
utils/copy_data_dir.sh: copied data from data/test to dump/raw/test
utils/validate_data_dir.sh: Successfully validated data-directory dump/raw/test
2024-08-09T18:47:56 (format_wav_scp.sh:46:main) scripts/audio/format_wav_scp.sh --nj 32 --cmd run.pl --audio-format flac --fs 16k --multi-columns-input false --multi-columns-output false data/test/wav.scp dump/raw/test
2024-08-09T18:47:57 (format_wav_scp.sh:118:main) [info]: without segments
2024-08-09T18:48:10 (format_wav_scp.sh:153:main) Successfully finished. [elapsed=14s]
2024-08-09T18:48:10 (asr.sh:1809:main) Successfully finished. [elapsed=488s]
第 4 阶段:删除长/短数据
# 第 4 阶段:删除长/短数据:dump/raw/org -> dump/raw
./asr.sh --stage 4 --stop_stage 4 --train_set train --valid_set dev --test_sets "dev test"
2024-08-09T18:50:44 (asr.sh:283:main) ./asr.sh --stage 4 --stop_stage 4 --train_set train --valid_set dev --test_sets dev test
2024-08-09T18:50:44 (asr.sh:321:main) Info: The valid_set 'dev' is included in the test_sets. '--eval_valid_set true' is set and 'dev' is removed from the test_sets
2024-08-09T18:50:44 (asr.sh:564:main) Skipped stages: 9 14 15
2024-08-09T18:50:44 (asr.sh:799:main) Stage 4: Remove long/short data: dump/raw/org -> dump/raw
utils/copy_data_dir.sh: copied data from dump/raw/org/train to dump/raw/train
utils/validate_data_dir.sh: Successfully validated data-directory dump/raw/train
fix_data_dir.sh: kept all 120098 utterances.
fix_data_dir.sh: old files are kept in dump/raw/train/.backup
utils/copy_data_dir.sh: copied data from dump/raw/org/dev to dump/raw/dev
utils/validate_data_dir.sh: Successfully validated data-directory dump/raw/dev
fix_data_dir.sh: kept all 14326 utterances.
fix_data_dir.sh: old files are kept in dump/raw/dev/.backup
2024-08-09T18:50:51 (asr.sh:1809:main) Successfully finished. [elapsed=7s]
第 5 阶段:生成token_list
# 第 5 阶段:使用 BPE 从 dump/raw/train/text 生成token_list。
# 此处需要在asr.sh设置nbpe=4234
./asr.sh --stage 5 --stop_stage 5 --train_set train --valid_set dev --test_sets "dev test"
2024-08-09T18:52:48 (asr.sh:283:main) ./asr.sh --stage 5 --stop_stage 5 --train_set train --valid_set dev --test_sets dev test
2024-08-09T18:52:48 (asr.sh:321:main) Info: The valid_set 'dev' is included in the test_sets. '--eval_valid_set true' is set and 'dev' is removed from the test_sets
2024-08-09T18:52:48 (asr.sh:564:main) Skipped stages: 9 14 15
2024-08-09T18:52:48 (asr.sh:877:main) Stage 5: Generate token_list from dump/raw/org/train/text using BPE
sentencepiece_trainer.cc(177) LOG(INFO) Running command: --input=data/token_list/bpe_unigram4234/train.txt --vocab_size=4234 --model_type=unigram --model_prefix=data/token_list/bpe_unigram4234/bpe --character_coverage=1.0 --input_sentence_size=100000000
sentencepiece_trainer.cc(77) LOG(INFO) Starts training with :
trainer_spec {
input: data/token_list/bpe_unigram4234/train.txt
input_format:
model_prefix: data/token_list/bpe_unigram4234/bpe
model_type: UNIGRAM
vocab_size: 4234
self_test_sample_size: 0
character_coverage: 1
input_sentence_size: 100000000
shuffle_input_sentence: 1
seed_sentencepiece_size: 1000000
shrinking_factor: 0.75
max_sentence_length: 4192
num_threads: 16
num_sub_iterations: 2
max_sentencepiece_length: 16
split_by_unicode_script: 1
split_by_number: 1
split_by_whitespace: 1
split_digits: 0
treat_whitespace_as_suffix: 0
allow_whitespace_only_pieces: 0
required_chars:
byte_fallback: 0
vocabulary_output_piece_score: 1
train_extremely_large_corpus: 0
hard_vocab_limit: 1
use_all_vocab: 0
unk_id: 0
bos_id: 1
eos_id: 2
pad_id: -1
unk_piece: <unk>
bos_piece: <s>
eos_piece: </s>
pad_piece: <pad>
unk_surface: ⁇
enable_differential_privacy: 0
differential_privacy_noise_level: 0
differential_privacy_clipping_threshold: 0
}
normalizer_spec {
name: nmt_nfkc
add_dummy_prefix: 1
remove_extra_whitespaces: 1
escape_whitespaces: 1
normalization_rule_tsv:
}
denormalizer_spec {}
trainer_interface.cc(350) LOG(INFO) SentenceIterator is not specified. Using MultiFileSentenceIterator.
trainer_interface.cc(181) LOG(INFO) Loading corpus: data/token_list/bpe_unigram4234/train.txt
trainer_interface.cc(406) LOG(INFO) Loaded all 120098 sentences
trainer_interface.cc(422) LOG(INFO) Adding meta_piece: <unk>
trainer_interface.cc(422) LOG(INFO) Adding meta_piece: <s>
trainer_interface.cc(422) LOG(INFO) Adding meta_piece: </s>
trainer_interface.cc(427) LOG(INFO) Normalizing sentences...
trainer_interface.cc(536) LOG(INFO) all chars count=1850211
trainer_interface.cc(557) LOG(INFO) Alphabet size=4231
trainer_interface.cc(558) LOG(INFO) Final character coverage=1
trainer_interface.cc(590) LOG(INFO) Done! preprocessed 120098 sentences.
unigram_model_trainer.cc(146) LOG(INFO) Making suffix array...
unigram_model_trainer.cc(150) LOG(INFO) Extracting frequent sub strings...
unigram_model_trainer.cc(201) LOG(INFO) Initialized 384921 seed sentencepieces
trainer_interface.cc(596) LOG(INFO) Tokenizing input sentences with whitespace: 120098
trainer_interface.cc(607) LOG(INFO) Done! 113737
unigram_model_trainer.cc(491) LOG(INFO) Using 113737 sentences for EM training
unigram_model_trainer.cc(507) LOG(INFO) EM sub_iter=0 size=169508 obj=60.2932 num_tokens=605625 num_tokens/piece=3.57284
unigram_model_trainer.cc(507) LOG(INFO) EM sub_iter=1 size=154327 obj=57.151 num_tokens=608759 num_tokens/piece=3.9446
unigram_model_trainer.cc(507) LOG(INFO) EM sub_iter=0 size=115321 obj=58.0022 num_tokens=643243 num_tokens/piece=5.57785
unigram_model_trainer.cc(507) LOG(INFO) EM sub_iter=1 size=114890 obj=57.5467 num_tokens=644182 num_tokens/piece=5.60695
unigram_model_trainer.cc(507) LOG(INFO) EM sub_iter=0 size=86118 obj=59.1864 num_tokens=685973 num_tokens/piece=7.9655
unigram_model_trainer.cc(507) LOG(INFO) EM sub_iter=1 size=86071 obj=58.6914 num_tokens=686845 num_tokens/piece=7.97998
unigram_model_trainer.cc(507) LOG(INFO) EM sub_iter=0 size=64542 obj=60.6321 num_tokens=730928 num_tokens/piece=11.3248
unigram_model_trainer.cc(507) LOG(INFO) EM sub_iter=1 size=64533 obj=60.1345 num_tokens=731803 num_tokens/piece=11.34
unigram_model_trainer.cc(507) LOG(INFO) EM sub_iter=0 size=48395 obj=62.3089 num_tokens=775418 num_tokens/piece=16.0227
unigram_model_trainer.cc(507) LOG(INFO) EM sub_iter=1 size=48393 obj=61.8552 num_tokens=775965 num_tokens/piece=16.0347
unigram_model_trainer.cc(507) LOG(INFO) EM sub_iter=0 size=36293 obj=64.1495 num_tokens=822372 num_tokens/piece=22.6592
unigram_model_trainer.cc(507) LOG(INFO) EM sub_iter=1 size=36293 obj=63.7144 num_tokens=822793 num_tokens/piece=22.6708
unigram_model_trainer.cc(507) LOG(INFO) EM sub_iter=0 size=27218 obj=66.1917 num_tokens=870513 num_tokens/piece=31.983
unigram_model_trainer.cc(507) LOG(INFO) EM sub_iter=1 size=27218 obj=65.7753 num_tokens=871226 num_tokens/piece=32.0092
unigram_model_trainer.cc(507) LOG(INFO) EM sub_iter=0 size=20412 obj=68.3477 num_tokens=920879 num_tokens/piece=45.1146
unigram_model_trainer.cc(507) LOG(INFO) EM sub_iter=1 size=20412 obj=67.9286 num_tokens=921147 num_tokens/piece=45.1277
unigram_model_trainer.cc(507) LOG(INFO) EM sub_iter=0 size=15309 obj=70.7446 num_tokens=974604 num_tokens/piece=63.6622
unigram_model_trainer.cc(507) LOG(INFO) EM sub_iter=1 size=15309 obj=70.2996 num_tokens=975000 num_tokens/piece=63.688
unigram_model_trainer.cc(507) LOG(INFO) EM sub_iter=0 size=11480 obj=73.3361 num_tokens=1034514 num_tokens/piece=90.1145
unigram_model_trainer.cc(507) LOG(INFO) EM sub_iter=1 size=11479 obj=72.8184 num_tokens=1034923 num_tokens/piece=90.1579
unigram_model_trainer.cc(507) LOG(INFO) EM sub_iter=0 size=8609 obj=76.2591 num_tokens=1105069 num_tokens/piece=128.362
unigram_model_trainer.cc(507) LOG(INFO) EM sub_iter=1 size=8609 obj=75.6089 num_tokens=1105469 num_tokens/piece=128.409
unigram_model_trainer.cc(507) LOG(INFO) EM sub_iter=0 size=6456 obj=80.114 num_tokens=1196553 num_tokens/piece=185.34
unigram_model_trainer.cc(507) LOG(INFO) EM sub_iter=1 size=6456 obj=79.1711 num_tokens=1196816 num_tokens/piece=185.38
unigram_model_trainer.cc(507) LOG(INFO) EM sub_iter=0 size=4842 obj=86.2754 num_tokens=1339006 num_tokens/piece=276.54
unigram_model_trainer.cc(507) LOG(INFO) EM sub_iter=1 size=4842 obj=84.6424 num_tokens=1339362 num_tokens/piece=276.613
unigram_model_trainer.cc(507) LOG(INFO) EM sub_iter=0 size=4657 obj=85.9046 num_tokens=1365198 num_tokens/piece=293.15
unigram_model_trainer.cc(507) LOG(INFO) EM sub_iter=1 size=4657 obj=85.6287 num_tokens=1365198 num_tokens/piece=293.15
trainer_interface.cc(685) LOG(INFO) Saving model: data/token_list/bpe_unigram4234/bpe.model
trainer_interface.cc(697) LOG(INFO) Saving vocabs: data/token_list/bpe_unigram4234/bpe.vocab
2024-08-09T18:52:58 (asr.sh:1809:main) Successfully finished. [elapsed=10s]
第 6-9 阶段:与语言建模相关的阶段
第 10 阶段:ASR 收集统计信息
# 第 10 阶段:ASR 收集统计信息:train_set=dump/raw/train,valid_set=dump/raw/dev
# 此阶段需要设置asr.sh中asr_config=asr_config=conf/train_asr_conformer.yaml
./asr.sh --stage 10 --stop_stage 10 --train_set train --valid_set dev --test_sets "dev test" --asr_config conf/train_asr_conformer.yaml
2024-08-09T18:56:13 (asr.sh:321:main) Info: The valid_set 'dev' is included in the test_sets. '--eval_valid_set true' is set and 'dev' is removed from the test_sets
2024-08-09T18:56:13 (asr.sh:564:main) Skipped stages: 9 14 15
2024-08-09T18:56:13 (asr.sh:1189:main) Stage 10: ASR collect stats: train_set=dump/raw/train, valid_set=dump/raw/dev
2024-08-09T18:56:13 (asr.sh:1240:main) Generate 'exp/asr_stats_raw_bpe4234/run.sh'. You can resume the process from stage 10 using this script
2024-08-09T18:56:13 (asr.sh:1244:main) ASR collect-stats started... log: 'exp/asr_stats_raw_bpe4234/logdir/stats.*.log'
/home/lnj524/miniconda3/envs/espnet/bin/python3 /s6home/lnj524/module/espnet/espnet2/bin/aggregate_stats_dirs.py --input_dir exp/asr_stats_raw_bpe4234/logdir/stats.1 --input_dir exp/asr_stats_raw_bpe4234/logdir/stats.2 --input_dir exp/asr_stats_raw_bpe4234/logdir/stats.3 --input_dir exp/asr_stats_raw_bpe4234/logdir/stats.4 --input_dir exp/asr_stats_raw_bpe4234/logdir/stats.5 --input_dir exp/asr_stats_raw_bpe4234/logdir/stats.6 --input_dir exp/asr_stats_raw_bpe4234/logdir/stats.7 --input_dir exp/asr_stats_raw_bpe4234/logdir/stats.8 --input_dir exp/asr_stats_raw_bpe4234/logdir/stats.9 --input_dir exp/asr_stats_raw_bpe4234/logdir/stats.10 --input_dir exp/asr_stats_raw_bpe4234/logdir/stats.11 --input_dir exp/asr_stats_raw_bpe4234/logdir/stats.12 --input_dir exp/asr_stats_raw_bpe4234/logdir/stats.13 --input_dir exp/asr_stats_raw_bpe4234/logdir/stats.14 --input_dir exp/asr_stats_raw_bpe4234/logdir/stats.15 --input_dir exp/asr_stats_raw_bpe4234/logdir/stats.16 --input_dir exp/asr_stats_raw_bpe4234/logdir/stats.17 --input_dir exp/asr_stats_raw_bpe4234/logdir/stats.18 --input_dir exp/asr_stats_raw_bpe4234/logdir/stats.19 --input_dir exp/asr_stats_raw_bpe4234/logdir/stats.20 --input_dir exp/asr_stats_raw_bpe4234/logdir/stats.21 --input_dir exp/asr_stats_raw_bpe4234/logdir/stats.22 --input_dir exp/asr_stats_raw_bpe4234/logdir/stats.23 --input_dir exp/asr_stats_raw_bpe4234/logdir/stats.24 --input_dir exp/asr_stats_raw_bpe4234/logdir/stats.25 --input_dir exp/asr_stats_raw_bpe4234/logdir/stats.26 --input_dir exp/asr_stats_raw_bpe4234/logdir/stats.27 --input_dir exp/asr_stats_raw_bpe4234/logdir/stats.28 --input_dir exp/asr_stats_raw_bpe4234/logdir/stats.29 --input_dir exp/asr_stats_raw_bpe4234/logdir/stats.30 --input_dir exp/asr_stats_raw_bpe4234/logdir/stats.31 --input_dir exp/asr_stats_raw_bpe4234/logdir/stats.32 --output_dir exp/asr_stats_raw_bpe4234
2024-08-09T19:01:59 (asr.sh:1809:main) Successfully finished. [elapsed=346s]
第 11 阶段:ASR 训练
# 第 11 阶段:ASR 训练:train_set=dump/raw/train,valid_set=dump/raw/dev
# 更改train_asr_conformer.yaml中max_epoch: 50,其他默认设置
export CUDA_VISIBLE_DEVICES=1,3,4,5
./asr.sh --stage 11 --stop_stage 11 --train_set train --valid_set dev --test_sets "dev test" --asr_config conf/train_asr_conformer.yaml --ngpu 4
2024-08-10T09:27:53 (asr.sh:321:main) Info: The valid_set 'dev' is included in the test_sets. '--eval_valid_set true' is set and 'dev' is removed from the test_sets
2024-08-10T09:27:53 (asr.sh:564:main) Skipped stages: 9 14 15
2024-08-10T09:27:53 (asr.sh:1308:main) Stage 11: ASR Training: train_set=dump/raw/train, valid_set=dump/raw/dev
2024-08-10T09:27:53 (asr.sh:1407:main) Generate 'exp/asr_train_asr_conformer_raw_bpe4234/run.sh'. You can resume the process from stage 11 using this script
2024-08-10T09:27:53 (asr.sh:1411:main) ASR training started... log: 'exp/asr_train_asr_conformer_raw_bpe4234/train.log'
2024-08-10 09:27:53,791 (launch:94) INFO: /home/lnj524/miniconda3/envs/espnet/bin/python3 /s6home/lnj524/module/espnet/espnet2/bin/launch.py --cmd 'run.pl --name exp/asr_train_asr_conformer_raw_bpe4234/train.log' --log exp/asr_train_asr_conformer_raw_bpe4234/train.log --ngpu 4 --num_nodes 1 --init_file_prefix exp/asr_train_asr_conformer_raw_bpe4234/.dist_init_ --multiprocessing_distributed true -- python3 -m espnet2.bin.asr_train --use_preprocessor true --bpemodel data/token_list/bpe_unigram4234/bpe.model --token_type bpe --token_list data/token_list/bpe_unigram4234/tokens.txt --non_linguistic_symbols none --cleaner none --g2p none --valid_data_path_and_name_and_type dump/raw/dev/wav.scp,speech,sound --valid_shape_file exp/asr_stats_raw_bpe4234/valid/speech_shape --resume true --ignore_init_mismatch false --fold_length 80000 --output_dir exp/asr_train_asr_conformer_raw_bpe4234 --config conf/train_asr_conformer.yaml --frontend_conf fs=16k --normalize=global_mvn --normalize_conf stats_file=exp/asr_stats_raw_bpe4234/train/feats_stats.npz --train_data_path_and_name_and_type dump/raw/train/wav.scp,speech,sound --train_shape_file exp/asr_stats_raw_bpe4234/train/speech_shape --fold_length 150 --train_data_path_and_name_and_type dump/raw/train/text,text,text --train_shape_file exp/asr_stats_raw_bpe4234/train/text_shape.bpe --valid_data_path_and_name_and_type dump/raw/dev/text,text,text --valid_shape_file exp/asr_stats_raw_bpe4234/valid/text_shape.bpe
2024-08-10 09:27:53,806 (launch:237) INFO: single-node with 4gpu on distributed mode
2024-08-10 09:27:53,809 (launch:348) INFO: log file: exp/asr_train_asr_conformer_raw_bpe4234/train.log
2024-08-10T15:46:41 (asr.sh:1809:main) Successfully finished. [elapsed=22728s]
tensorboard
可视化
由于我使用的是服务器,无法生成链接,同wenet一样,将训练生成的tensorboard文件,拿到本机上运行查看效果。
tensorboard --logdir tensorboard/train/ --port 12598 --bind_all
tensorboard --logdir tensorboard/valid/ --port 12598 --bind_all
第 12 阶段:解码
export CUDA_VISIBLE_DEVICES=1,3,4,5
./asr.sh --use_lm false --gpu_inference true --inference_nj 4 --stage 12 --stop_stage 12 --train_set train --valid_set dev --test_sets "dev test" --asr_exp exp/asr_train_asr_conformer_raw_bpe4234 --inference_config conf/decode_asr_transformer.yaml --ngpu 4
2024-08-11T09:02:43 (asr.sh:283:main) ./asr.sh --use_lm false --gpu_inference true --inference_nj 4 --stage 12 --stop_stage 12 --train_set train --valid_set dev --test_sets dev test --asr_exp exp/asr_train_asr_conformer_raw_bpe4234 --inference_config conf/decode_asr_transformer.yaml --ngpu 42024-08-11T09:02:43 (asr.sh:321:main) Info: The valid_set 'dev' is included in the test_sets. '--eval_valid_set true' is set and 'dev' is removed from the test_sets
2024-08-11T09:02:43 (asr.sh:564:main) Skipped stages: 6 7 8 9 14 15
2024-08-11T09:02:43 (asr.sh:1480:main) Stage 12: Decoding: training_dir=exp/asr_train_asr_conformer_raw_bpe42342024-08-11T09:02:43 (asr.sh:1508:main) Generate 'exp/asr_train_asr_conformer_raw_bpe4234/decode_asr_transformer_asr_model_valid.acc.ave/run.sh'. You can resume the process from stage 12 using this script
2024-08-11T09:02:43 (asr.sh:1573:main) Decoding started... log: 'exp/asr_train_asr_conformer_raw_bpe4234/decode_asr_transformer_asr_model_valid.acc.ave/org/dev/logdir/asr_inference.*.log'
2024-08-11T11:58:37 (asr.sh:1589:main) Calculating RTF & latency... log: 'exp/asr_train_asr_conformer_raw_bpe4234/decode_asr_transformer_asr_model_valid.acc.ave/org/dev/logdir/calculate_rtf.log'
2024-08-11T11:58:40 (asr.sh:1573:main) Decoding started... log: 'exp/asr_train_asr_conformer_raw_bpe4234/decode_asr_transformer_asr_model_valid.acc.ave/test/logdir/asr_inference.*.log'
2024-08-11T13:35:20 (asr.sh:1589:main) Calculating RTF & latency... log: 'exp/asr_train_asr_conformer_raw_bpe4234/decode_asr_transformer_asr_model_valid.acc.ave/test/logdir/calculate_rtf.log'
2024-08-11T13:35:21 (asr.sh:1809:main) Successfully finished. [elapsed=16358s]
第 13 阶段:得分
./asr.sh --stage 13 --stop_stage 13 --train_set train --valid_set dev --test_sets "dev test" --use_lm false --asr_exp exp/asr_train_asr_conformer_raw_bpe4234 --inference_config decode_asr_transformer.yaml
2024-08-11T14:41:53 (asr.sh:321:main) Info: The valid_set 'dev' is included in the test_sets. '--eval_valid_set true' is set and 'dev' is removed from the test_sets
2024-08-11T14:41:54 (asr.sh:564:main) Skipped stages: 6 7 8 9 14 15
2024-08-11T14:41:54 (asr.sh:1621:main) Stage 13: Scoring
/home/lnj524/miniconda3/envs/espnet/bin/python3 /s6home/lnj524/module/espnet/espnet2/bin/tokenize_text.py -f 2- --input - --output - --cleaner none --token_type char --non_linguistic_symbols none --remove_non_linguistic_symbols true
/home/lnj524/miniconda3/envs/espnet/bin/python3 /s6home/lnj524/module/espnet/espnet2/bin/tokenize_text.py -f 2- --input - --output - --token_type char --non_linguistic_symbols none --remove_non_linguistic_symbols true --cleaner none
2024-08-11T14:42:36 (asr.sh:1711:main) Write cer result in exp/asr_train_asr_conformer_raw_bpe4234/decode_asr_transformer_asr_model_valid.acc.ave/org/dev/score_cer/result.txt
| SPKR | # Snt # Wrd | Corr Sub Del Ins Err S.Err |
| Sum/Avg | 14326 205341 | 94.8 5.1 0.1 0.1 5.3 38.9 |
/home/lnj524/miniconda3/envs/espnet/bin/python3 /s6home/lnj524/module/espnet/espnet2/bin/tokenize_text.py -f 2- --input - --output - --cleaner none --token_type word --non_linguistic_symbols none --remove_non_linguistic_symbols true
/home/lnj524/miniconda3/envs/espnet/bin/python3 /s6home/lnj524/module/espnet/espnet2/bin/tokenize_text.py -f 2- --input - --output - --token_type word --non_linguistic_symbols none --remove_non_linguistic_symbols true --cleaner none
2024-08-11T14:43:04 (asr.sh:1711:main) Write wer result in exp/asr_train_asr_conformer_raw_bpe4234/decode_asr_transformer_asr_model_valid.acc.ave/org/dev/score_wer/result.txt
| SPKR | # Snt # Wrd | Corr Sub Del Ins Err S.Err |
| Sum/Avg | 14326 14326 | 61.1 38.9 0.0 0.0 38.9 38.9 |
/home/lnj524/miniconda3/envs/espnet/bin/python3 /s6home/lnj524/module/espnet/espnet2/bin/tokenize_text.py -f 2- --input - --output - --cleaner none --token_type bpe --bpemodel data/token_list/bpe_unigram4234/bpe.model
/home/lnj524/miniconda3/envs/espnet/bin/python3 /s6home/lnj524/module/espnet/espnet2/bin/tokenize_text.py -f 2- --input - --output - --token_type bpe --bpemodel data/token_list/bpe_unigram4234/bpe.model --cleaner none
2024-08-11T14:43:48 (asr.sh:1711:main) Write ter result in exp/asr_train_asr_conformer_raw_bpe4234/decode_asr_transformer_asr_model_valid.acc.ave/org/dev/score_ter/result.txt
| SPKR | # Snt # Wrd | Corr Sub Del Ins Err S.Err |
| Sum/Avg | 14326 219662 | 95.2 4.7 0.1 0.1 4.9 38.9 |
/home/lnj524/miniconda3/envs/espnet/bin/python3 /s6home/lnj524/module/espnet/espnet2/bin/tokenize_text.py -f 2- --input - --output - --cleaner none --token_type char --non_linguistic_symbols none --remove_non_linguistic_symbols true
/home/lnj524/miniconda3/envs/espnet/bin/python3 /s6home/lnj524/module/espnet/espnet2/bin/tokenize_text.py -f 2- --input - --output - --token_type char --non_linguistic_symbols none --remove_non_linguistic_symbols true --cleaner none
2024-08-11T14:44:09 (asr.sh:1711:main) Write cer result in exp/asr_train_asr_conformer_raw_bpe4234/decode_asr_transformer_asr_model_valid.acc.ave/test/score_cer/result.txt
| SPKR | # Snt # Wrd | Corr Sub Del Ins Err S.Err |
| Sum/Avg | 7176 104765 | 94.5 5.4 0.2 0.1 5.7 40.2 |
/home/lnj524/miniconda3/envs/espnet/bin/python3 /s6home/lnj524/module/espnet/espnet2/bin/tokenize_text.py -f 2- --input - --output - --cleaner none --token_type word --non_linguistic_symbols none --remove_non_linguistic_symbols true
/home/lnj524/miniconda3/envs/espnet/bin/python3 /s6home/lnj524/module/espnet/espnet2/bin/tokenize_text.py -f 2- --input - --output - --token_type word --non_linguistic_symbols none --remove_non_linguistic_symbols true --cleaner none
2024-08-11T14:44:29 (asr.sh:1711:main) Write wer result in exp/asr_train_asr_conformer_raw_bpe4234/decode_asr_transformer_asr_model_valid.acc.ave/test/score_wer/result.txt
| SPKR | # Snt # Wrd | Corr Sub Del Ins Err S.Err |
| Sum/Avg | 7176 7176 | 59.8 40.2 0.0 0.0 40.2 40.2 |
/home/lnj524/miniconda3/envs/espnet/bin/python3 /s6home/lnj524/module/espnet/espnet2/bin/tokenize_text.py -f 2- --input - --output - --cleaner none --token_type bpe --bpemodel data/token_list/bpe_unigram4234/bpe.model
/home/lnj524/miniconda3/envs/espnet/bin/python3 /s6home/lnj524/module/espnet/espnet2/bin/tokenize_text.py -f 2- --input - --output - --token_type bpe --bpemodel data/token_list/bpe_unigram4234/bpe.model --cleaner none
2024-08-11T14:44:46 (asr.sh:1711:main) Write ter result in exp/asr_train_asr_conformer_raw_bpe4234/decode_asr_transformer_asr_model_valid.acc.ave/test/score_ter/result.txt
| SPKR | # Snt # Wrd | Corr Sub Del Ins Err S.Err |
| Sum/Avg | 7176 111941 | 94.8 5.0 0.1 0.1 5.3 40.2 |
## exp/asr_train_asr_conformer_raw_bpe4234
### WER
|dataset|Snt|Wrd|Corr|Sub|Del|Ins|Err|S.Err|
|---|---|---|---|---|---|---|---|---|
|decode_asr_transformer_asr_model_valid.acc.ave/test|7176|7176|59.8|40.2|0.0|0.0|40.2|40.2|
### CER
|dataset|Snt|Wrd|Corr|Sub|Del|Ins|Err|S.Err|
|---|---|---|---|---|---|---|---|---|
|decode_asr_transformer_asr_model_valid.acc.ave/test|7176|104765|94.5|5.4|0.2|0.1|5.7|40.2|
### TER
|dataset|Snt|Wrd|Corr|Sub|Del|Ins|Err|S.Err|
|---|---|---|---|---|---|---|---|---|
|decode_asr_transformer_asr_model_valid.acc.ave/test|7176|111941|94.8|5.0|0.1|0.1|5.3|40.2|
## exp/asr_train_asr_conformer_raw_bpe4234/decode_asr_transformer_asr_model_valid.acc.ave
### WER
|dataset|Snt|Wrd|Corr|Sub|Del|Ins|Err|S.Err|
|---|---|---|---|---|---|---|---|---|
|org/dev|14326|14326|61.1|38.9|0.0|0.0|38.9|38.9|
### CER
|dataset|Snt|Wrd|Corr|Sub|Del|Ins|Err|S.Err|
|---|---|---|---|---|---|---|---|---|
|org/dev|14326|205341|94.8|5.1|0.1|0.1|5.3|38.9|
### TER
|dataset|Snt|Wrd|Corr|Sub|Del|Ins|Err|S.Err|
|---|---|---|---|---|---|---|---|---|
|org/dev|14326|219662|95.2|4.7|0.1|0.1|4.9|38.9|
2024-08-11T14:44:51 (asr.sh:1809:main) Successfully finished. [elapsed=178s]
exp/asr_train_asr_conformer_raw_bpe4234
WER
dataset | Snt | Wrd | Corr | Sub | Del | Ins | Err | S.Err |
---|---|---|---|---|---|---|---|---|
decode_asr_transformer_asr_model_valid.acc.ave/test | 7176 | 7176 | 59.8 | 40.2 | 0.0 | 0.0 | 40.2 | 40.2 |
CER
dataset | Snt | Wrd | Corr | Sub | Del | Ins | Err | S.Err |
---|---|---|---|---|---|---|---|---|
decode_asr_transformer_asr_model_valid.acc.ave/test | 7176 | 104765 | 94.5 | 5.4 | 0.2 | 0.1 | 5.7 | 40.2 |
TER
dataset | Snt | Wrd | Corr | Sub | Del | Ins | Err | S.Err |
---|---|---|---|---|---|---|---|---|
decode_asr_transformer_asr_model_valid.acc.ave/test | 7176 | 111941 | 94.8 | 5.0 | 0.1 | 0.1 | 5.3 | 40.2 |
exp/asr_train_asr_conformer_raw_bpe4234/decode_asr_transformer_asr_model_valid.acc.ave
WER
dataset | Snt | Wrd | Corr | Sub | Del | Ins | Err | S.Err |
---|---|---|---|---|---|---|---|---|
org/dev | 14326 | 14326 | 61.1 | 38.9 | 0.0 | 0.0 | 38.9 | 38.9 |
CER
dataset | Snt | Wrd | Corr | Sub | Del | Ins | Err | S.Err |
---|---|---|---|---|---|---|---|---|
org/dev | 14326 | 205341 | 94.8 | 5.1 | 0.1 | 0.1 | 5.3 | 38.9 |
TER
dataset | Snt | Wrd | Corr | Sub | Del | Ins | Err | S.Err |
---|---|---|---|---|---|---|---|---|
org/dev | 14326 | 219662 | 95.2 | 4.7 | 0.1 | 0.1 | 4.9 | 38.9 |