这个脚本的输入有两个参数:$H $thchs
H代表当前工作路径,thchs代表数据文件路径
#!/bin/bash
# Copyright 2016 Tsinghua University (Author: Dong Wang, Xuewei Zhang). Apache 2.0.
# 2016 LeSpeech (Author: Xingyu Na)
#This script pepares the data directory for thchs30 recipe.
#It reads the corpus and get wav.scp and transcriptions.
dir=$1 # 读取输入的第一个参数,这里是工作路径
corpus_dir=$2 # 读取输入的第二个参数,这里是语料库文件路径,这个路径下就是train dev test文件夹
cd $dir # 切换到工作路径
echo "creating data/{train,dev,test}"
mkdir -p data/{
train,dev,test} # 创建两级目录 data/train data/dev data/test
#create wav.scp, utt2spk.scp, spk2utt.scp, text
(
for x in train dev test; do
echo "cleaning data/$x"
cd $dir/data/