Timit原始数据虽然是以wav结尾的但是格式却不是wav,而是sphere格式,用python中的sphfile库把他转换成wav:
import params as hp
from sphfile import SPHFile
import glob
import os
if __name__ == "__main__":
path = 'D:/pycharm_proj/corpus/data/lisa/data/timit/raw/TIMIT/TRAIN/*/*/*.WAV'
sph_files = glob.glob(path)
print(len(sph_files),"train utterences")
for i in sph_files:
sph = SPHFile(i)
sph.write_wav(filename=i.replace(".WAV","_.wav"))
os.remove(i)
path = 'D:/pycharm_proj/corpus/data/lisa/data/timit/raw/TIMIT/TEST/*/*/*.WAV'
sph_files_test = glob.glob(path)
print(len(sph_files_test),"test utterences")
for i in sph_files_test:
sph = SPHFile(i)
sph.write_wav(filename=i.replace(".WAV","_.wav"))
os.remove(i)
print("Completed")
Timit数据集下载:
链接:https://pan.baidu.com/s/1rE0s4Tc2MonI6lIkGyof-g
提取码:l0ee