接上回
if [ -f $data/segments ]; then #这里是查看data/mfcc/train下是否存在segments,如果存在则执行then后面的脚本,否则执行else后面的,
这个文件不存在,所以之后调用else后面的处理
echo "$0 [info]: segments file exists: using that."
split_segments=""
for n in $(seq $nj); do
split_segments="$split_segments $logdir/segments.$n"
done
utils/split_scp.pl $data/segments $split_segments || exit 1;
rm $logdir/.error 2>/dev/null
$cmd JOB=1:$nj $logdir/make_mfcc_${name}.JOB.log \
extract-segments scp,p:$scp $logdir/segments.JOB ark:- \| \
compute-mfcc-feats $vtln_opts --verbose=2 --config=$mfcc_config ark:- ark:- \| \
copy-feats --compress=$compress $write_num_frames_opt ark:- \
ark,scp:$mfccdir/raw_mfcc_$name.JOB.ark,$mfccdir/raw_mfcc_$name.JOB.scp \
|| exit 1;
else
echo "$0: [info]: no segments file exists: assuming wav.scp indexed by utterance." #打印make_mfcc.sh: [info]:没有段文件存在:假设wav.scp索引是通过说话者进行
split_scps=""
for n in $(seq $nj); do
split_scps="$split_scps $logdir/wav_${name}.$n.scp"
echo "----test: $split_scps"
done #这里应该是切分为4个,每一个都添加到这个变量中,我们注释后面的,加入一些打印运行一下
![](https://i-blog.csdnimg.cn/blog_migrate/e13577e73eda82ef07e71dbb244809a4.png)
我们看到分为了四个
utils/split_scp.pl $scp $split_scps || exit 1; #这里就是将$scp(也就是 data/mfcc/train下的wav.scp)按照 $split_scps切分,也就是切分为上面说到的四个,我们后面在具体的看。我们执行一下这句。我们看到在exp/make_mfcc/train下生成4个文件。
![](https://i-blog.csdnimg.cn/blog_migrate/6825801b869287a58b841d03019fe0a6.png)
可以用对比软件与原来的wav.scp对比一下。后来发现每个文件中是2500条目,一共是10000条目。
# add ,p to the input rspecifier so that we can just skip over
# utterances that have bad wave data. #加入,p
if [ -f $data/segments ]; then #这里是查看data/mfcc/train下是否存在segments,如果存在则执行then后面的脚本,否则执行else后面的,
这个文件不存在,所以之后调用else后面的处理
echo "$0 [info]: segments file exists: using that."
split_segments=""
for n in $(seq $nj); do
split_segments="$split_segments $logdir/segments.$n"
done
utils/split_scp.pl $data/segments $split_segments || exit 1;
rm $logdir/.error 2>/dev/null
$cmd JOB=1:$nj $logdir/make_mfcc_${name}.JOB.log \
extract-segments scp,p:$scp $logdir/segments.JOB ark:- \| \
compute-mfcc-feats $vtln_opts --verbose=2 --config=$mfcc_config ark:- ark:- \| \
copy-feats --compress=$compress $write_num_frames_opt ark:- \
ark,scp:$mfccdir/raw_mfcc_$name.JOB.ark,$mfccdir/raw_mfcc_$name.JOB.scp \
|| exit 1;
else
echo "$0: [info]: no segments file exists: assuming wav.scp indexed by utterance." #打印make_mfcc.sh: [info]:没有段文件存在:假设wav.scp索引是通过说话者进行
split_scps=""
for n in $(seq $nj); do
split_scps="$split_scps $logdir/wav_${name}.$n.scp"
echo "----test: $split_scps"
done #这里应该是切分为4个,每一个都添加到这个变量中,我们注释后面的,加入一些打印运行一下
![](https://i-blog.csdnimg.cn/blog_migrate/e13577e73eda82ef07e71dbb244809a4.png)
我们看到分为了四个
utils/split_scp.pl $scp $split_scps || exit 1; #这里就是将$scp(也就是 data/mfcc/train下的wav.scp)按照 $split_scps切分,也就是切分为上面说到的四个,我们后面在具体的看。我们执行一下这句。我们看到在exp/make_mfcc/train下生成4个文件。
![](https://i-blog.csdnimg.cn/blog_migrate/6825801b869287a58b841d03019fe0a6.png)
可以用对比软件与原来的wav.scp对比一下。后来发现每个文件中是2500条目,一共是10000条目。
# add ,p to the input rspecifier so that we can just skip over
# utterances that have bad wave data. #加入,p