kaldi sre16/v1中local/make_mx6.sh

最新推荐文章于 2021-11-19 16:30:02 发布

Grace_yanyanyan

最新推荐文章于 2021-11-19 16:30:02 发布

阅读量367

点赞数

分类专栏： kaldi学习

本文链接：https://blog.csdn.net/yj13811596648/article/details/103344924

版权

kaldi学习专栏收录该内容

11 篇文章 3 订阅

订阅专栏

#!/bin/bash
# Copyright 2017   David Snyder
# Apache 2.0.
#
# This script prepares both the microphone and telephone portions of the
# Mixer 6 corpus.
# 主要是将mic麦克风数据和telephone电话数据整合在一起，另外包含了一些具体操作的细节

# -ne 检测左右两个数字是否相等，相等则返回true
if [ $# -ne 2 ]; then
  echo "Usage: $0 <mixer6-speech> <out-dir>"
  echo "e.g.: $0 /export/corpora/LDC/LDC2013S03 data/"
  exit 1;
fi

set -e
in_dir=$1
out_dir=$2

# Mic 01 is the lapel mic for the interviewer, so we don't use it.  Mic 02 is
# the lapel mic for the interviewee.  All other mics are placed throughout the
# room.  In addition to mic 01, we omit mics 03 and 14 as they are often
# silent.
# Mic 01是面试官的翻领话筒，所以我们不用。Mic 02是受访者的翻领话筒。
# 所有其他麦克风都放在房间里。除了mic 01，我们省略了mic 03和14，因为它们通常是无声的。
# 说明为啥excluding 01, 03, and 14

echo "$0: preparing mic speech (excluding 01, 03, and 14)"

for mic in 02 04 05 06 07 08 09 10 11 12 13; do
  local/make_mx6_mic.pl $in_dir $mic $out_dir
done
# $out_dir 输出文件夹是自动创建的，创建中出错会有系统提示

# make_mx6_mic.pl  这个文件做了什么事情呢？它需要三个参数，输入文件夹，麦克风编号，输出文件夹
# Prepares Mixer 6 (LDC2013S03) speech from a specified microphone and
# downsamples it to 8k.
# 还利用utt2spk，自动生成spk2utt文件
# 还调用了utils/fix_data_dir.sh，减少不必要的计算，节约算力，utterances的总数有可能减少
# 还调用了utils/validate_data_dir.sh，针对no-feats，no-text，no-wav，no-spk-sort这几个参数
# 来检查给定文件夹下面各个文件的正确性


utils/combine_data.sh $out_dir/mx6_mic_04_to_13 $out_dir/mx6_mic_{04,05,06,07,08,09,10,11,12,13}
# combine_data.sh  这个文件做了什么事情呢？
# 它至少需要2个参数（下面尖括号中的，一个目的文件夹，一个源文件夹），用--传入的超参数不参与参数计数
# This script combines the data from multiple source directories into
# a single destination directory.
# echo "Usage: combine_data.sh [--extra-files 'file1 file2'] <dest-data-dir> <src-data-dir1> <src-data-dir2> ..."
# echo "Note, files that don't appear in all source dirs will not be combined,"
# echo "with the exception of utt2uniq and segments, which are created where necessary."
# 它也对下面这些文件进行了数量上的检查：
# utt2spk utt2lang utt2dur reco2dur feats.scp text cmvn.scp vad.scp reco2file_and_channel wav.scp spk2gender



# Mics 02-13 contain the same content, but recorded from different microphones.
# To get some channel diversity, but not be overwhelmed with duplicated data
# we take a 2k subset from mics 04-13 and combine it with all of mic 02.


echo "$0: selecting a 2k subset of mics 04 through 13 and combining it with mic 02"
utils/subset_data_dir.sh $out_dir/mx6_mic_04_to_13 2000 $out_dir/mx6_mic_04_to_13_2k

# subset_data_dir.sh，这个文件都做了些什么呢？
# 这里给了3个参数<srcdir> <num-utt> <destdir>，但实际上还可以用--的形式传入很多其它超参数
# 可以设定每个说话人都说相同的话语数目，可以设定说话人数目，可以挑选最短，最前，最后的utterence数目
# 挑选特定的说话人，或特定的几句话
# 这里只是随机的挑选了2000个utterence


utils/combine_data.sh $out_dir/mx6_mic $out_dir/mx6_mic_02 $out_dir/mx6_mic_04_to_13_2k
# 参考上面解释，参数的意义分别是：<dest-data-dir> <src-data-dir1> <src-data-dir2>

echo "$0: preparing telephone portion"
local/make_mx6_calls.pl $in_dir $out_dir
# make_mx6_calls.pl  这个文件是用来做什么的呢？
# Prepares the telephone portion of Mixer 6 (LDC2013S03)
# 电话语音的原始格式是SPHERE的，调用sph2pipe，这个c++工具进行了转换
# 自动生成spk2utt
# 最后调用了fix_data_dir.sh和validate_data_dir.sh，检查了一些文件

echo "$0 combining mic and telephone speech in data/mx6"
utils/combine_data.sh $out_dir/mx6 $out_dir/mx6_mic $out_dir/mx6_calls
# 参考上面解释，参数的意义分别是：<dest-data-dir> <src-data-dir1> <src-data-dir2>

utils/fix_data_dir.sh $out_dir/mx6
# 调用fix_data_dir.sh，utterence的总数有可能会减少