- data/test
# things in data/*test* and data/*train*
cmvn.scp # Start point of every audio feature at ark file(binary) after CMVN(Cepstral Mean and Variance Normalization)
feats.scp # Start point of every audio feature at ark file(binary)
spk2utt # speaker name to his(her) utterance
text # Audio names and corresponding text
utt2spk # utterance to its speaker name
wav.scp # Location of every audio
/splitN # A directory used to split task into N parts, each part has splited things above
- data/lang
# thing in data/*lang*, which is a language directory
-
/phones
-
align_lexicon.txt
WORD WORD PRONUNCIATION
e.g.
HI HI HH_B AY_E
-
/tmp
G.fst # Grammar's finite state transducer
L.fst # Lexicon's finite state transducer
L_disambig.fst # disambigous lexicon's finite state transducer
oov.int # IDs of out of vocabulary phones
oov.txt # out of vocabulary phones
phones.txt # phones existed in words.txt file
topo #
words.txt # a word list of each word with its ID, the word existed in text file