最大的语音数据下载网站:
vox-celeb说话人识别数据集:无法下载
中国版本的vox能下载:
AISHELL-1 数据集解压方法
$ tar xzf data_aishell.tgz
$ cd data_aishell/wav
$ for tar in *.tar.gz; do tar xvf
$ tar; done
数据的组织形式,以语音识别为例子:
{
"dict_filename": "dict.txt",
"dataset":{
"train":[
{
"name": "thchs30_train",
"data_list": "datalist/thchs30/train.wav.lst",
"data_path": "/data/speech_data",
"label_list": "datalist/thchs30/train.syllable.txt"
},
{
"name": "stcmds_train",
"data_list": "datalist/st-cmds/train.wav.txt",
"data_path": "/data/speech_data",
"label_list": "datalist/st-cmds/train.syllable.txt"
},
{
"name": "primewords_train",
"data_list": "datalist/primewords/train.wav.lst",
"data_path": "/data/speech_data",
"label_list": "datalist/primewords/train.syllable.txt"
},
{
"name": "aishell_train",
"data_list": "datalist/aishell/train.wav.lst",
"data_path": "/data/speech_data",
"label_list": "datalist/aishell/train.syllable.txt"
},
{
"name": "aidatatang_train",
"data_list": "datalist/aidatatang_lst/train.wav.lst",
"data_path": "/data/speech_data",
"label_list": "datalist/aidatatang_lst/train.syllable.txt"
},
{
"name": "magicdata_train",
"data_list": "datalist/magicdata_lst/train.wav.lst",
"data_path": "/data/speech_data/magicdata",
"label_list": "datalist/magicdata_lst/train.syllable.txt"
}
],
"dev":[
{
"name": "thchs30_dev",
"data_list": "datalist/thchs30/cv.wav.lst",
"data_path": "/data/speech_data",
"label_list": "datalist/thchs30/cv.syllable.txt"
},
{
"name": "stcmds_dev",
"data_list": "datalist/st-cmds/dev.wav.txt",
"data_path": "/data/speech_data",
"label_list": "datalist/st-cmds/dev.syllable.txt"
},
{
"name": "primewords_dev",
"data_list": "datalist/primewords/dev.wav.lst",
"data_path": "/data/speech_data",
"label_list": "datalist/primewords/dev.syllable.txt"
},
{
"name": "aishell_dev",
"data_list": "datalist/aishell/dev.wav.lst",
"data_path": "/data/speech_data",
"label_list": "datalist/aishell/dev.syllable.txt"
},
{
"name": "aidatatang_dev",
"data_list": "datalist/aidatatang_lst/dev.wav.lst",
"data_path": "/data/speech_data",
"label_list": "datalist/aidatatang_lst/dev.syllable.txt"
},
{
"name": "magicdata_dev",
"data_list": "datalist/magicdata_lst/dev.wav.lst",
"data_path": "/data/speech_data/magicdata",
"label_list": "datalist/magicdata_lst/dev.syllable.txt"
}
],
"test":[
{
"name": "thchs30_test",
"data_list": "datalist/thchs30/test.wav.lst",
"data_path": "/data/speech_data",
"label_list": "datalist/thchs30/test.syllable.txt"
},
{
"name": "stcmds_test",
"data_list": "datalist/st-cmds/test.wav.txt",
"data_path": "/data/speech_data",
"label_list": "datalist/st-cmds/test.syllable.txt"
},
{
"name": "primewords_test",
"data_list": "datalist/primewords/test.wav.lst",
"data_path": "/data/speech_data",
"label_list": "datalist/primewords/test.syllable.txt"
},
{
"name": "aishell_test",
"data_list": "datalist/aishell/test.wav.lst",
"data_path": "/data/speech_data",
"label_list": "datalist/aishell/test.syllable.txt"
},
{
"name": "aidatatang_test",
"data_list": "datalist/aidatatang_lst/test.wav.lst",
"data_path": "/data/speech_data",
"label_list": "datalist/aidatatang_lst/test.syllable.txt"
},
{
"name": "magicdata_test",
"data_list": "datalist/magicdata_lst/test.wav.lst",
"data_path": "/data/speech_data/magicdata",
"label_list": "datalist/magicdata_lst/test.syllable.txt"
}
]
}
}
图像处理数据集:
常见的深度学习图像处理数据集下载_萌1萌哒小萌萌的博客-CSDN博客_图像识别数据集下载
目前深度学习开源数据集整理_林老头、的博客-CSDN博客_dtu数据集