ADNI Series
1、【ADNI】数据预处理(1)SPM,CAT12
2、【ADNI】数据预处理(2)获取 subject slices
3、【ADNI】数据预处理(3)CNNs
4、【ADNI】数据预处理(4)Get top k slices according to CNNs
5、【ADNI】数据预处理(5)Get top k slices (pMCI_sMCI) according to CNNs
6、【ADNI】数据预处理(6)ADNI_slice_dataloader ||| show image
## rules ##
## old_name = root_path + subject_id + top_k_slices_id
## subject_id: random select
## top_k_slices_id: majority select by CNNs
What you need to prepare:
1) all slices was put into relevant subject_id folder
hcq@research:~/alzheimer_disease/ADNI_825/AD_NC_GM_subject_id/AD_NC_GM_subject_id_20180403$ ls
AD_GM_subject_id NC_GM_subject_id
hcq@research:~/alzheimer_disease/ADNI_825/AD_NC_GM_subject_id/AD_NC_GM_subject_id_20180403$ tree -L 2
.
├── AD_GM_subject_id
│ ├── 002_S_0619
│ ├── 002_S_0816
│ ├── 002_S_0938
│ ├── 002_S_0955
│ ├── 002_S_1018
│ ├── 003_S_1059
│ ├── 003_S_1257
│ ├── 005_S_0221
│ ├── 005_S_0814
│ ├── 005_S_0929
│ ├── 005_S_1341
│ ├── 006_S_0547
│ ├── 006_S_0653
│ ├── 007_S_0316
│ ├── 007_S_1248
│ ├── 007_S_1304
│ ├── 007_S_1339
│ ├── 009_S_1334
│ ├── 009_S_1354
│ ├── 010_S_0786
│ ├── 010_S_0829
│ ├── 011_S_0003
│ ├── 011_S_0010
│ ├── 011_S_0053
│ ├── 011_S_0183
│ ├── 012_S_0689
│ ├── 012_S_0712
│ ├── 012_S_0720
│ ├── 012_S_0803
│ ├── 013_S_0592
│ ├── 013_S_0699
│ ├── 013_S_0996
│ ├── 013_S_1161
│ ├── 013_S_1205
│ ├── 014_S_0328
│ ├── 014_S_0356
│ ├── 014_S_0357
│ ├── 014_S_1095
│ ├── 016_S_0991
│ ├── 016_S_1263
│ ├── 018_S_0277
│ ├── 018_S_0286
│ ├── 018_S_0335
│ ├── 018_S_0633
│ ├── 018_S_0682
│ ├── 020_S_0213
│ ├── 021_S_0343
│ ├── 021_S_0642
│ ├── 021_S_0753
│ ├── 021_S_1109
│ ├── 022_S_0007
│ ├── 022_S_0129
│ ├── 022_S_0219
│ ├── 022_S_0543
│ ├── 023_S_0083
│ ├── 023_S_0084
│ ├── 023_S_0093
│ ├── 023_S_0139
│ ├── 023_S_0916
│ ├── 023_S_1262
│ ├── 023_S_1289
│ ├── 024_S_1171
│ ├── 024_S_1307
│ ├── 027_S_0404
│ ├── 027_S_0850
│ ├── 027_S_1081
│ ├── 027_S_1082
│ ├── 027_S_1254
│ ├── 027_S_1385
│ ├── 029_S_0836
│ ├── 029_S_0999
│ ├── 029_S_1056
│ ├── 029_S_1184
│ ├── 031_S_0321
│ ├── 031_S_0554
│ ├── 031_S_0773
│ ├── 031_S_1209
│ ├── 032_S_0147
│ ├── 032_S_0400
│ ├── 032_S_1037
│ ├── 032_S_1101
│ ├── 033_S_0724
│ ├── 033_S_0733
│ ├── 033_S_0739
│ ├── 033_S_0888
│ ├── 033_S_0889
│ ├── 033_S_1087
│ ├── 033_S_1281
│ ├── 033_S_1283
│ ├── 033_S_1285
│ ├── 033_S_1308
│ ├── 035_S_0341
│ ├── 036_S_0577
│ ├── 036_S_0759
│ ├── 036_S_0760
│ ├── 036_S_1001
│ ├── 037_S_0627
│ ├── 041_S_1368
│ ├── 041_S_1391
│ ├── 041_S_1435
│ ├── 051_S_1296
│ ├── 053_S_1044
│ ├── 057_S_0474
│ ├── 057_S_1371
│ ├── 057_S_1373
│ ├── 057_S_1379
│ ├── 062_S_0535
│ ├── 062_S_0690
│ ├── 062_S_0730
│ ├── 062_S_0793
│ ├── 067_S_0020
│ ├── 067_S_0029
│ ├── 067_S_0076
│ ├── 067_S_0110
│ ├── 067_S_0812
│ ├── 067_S_0828
│ ├── 067_S_1185
│ ├── 067_S_1253
│ ├── 068_S_0109
│ ├── 073_S_0565
│ ├── 073_S_1207
│ ├── 082_S_1079
│ ├── 082_S_1377
│ ├── 094_S_1027
│ ├── 094_S_1090
│ ├── 094_S_1102
│ ├── 094_S_1164
│ ├── 094_S_1397
│ ├── 094_S_1402
│ ├── 098_S_0149
│ ├── 098_S_0884
│ ├── 099_S_0372
│ ├── 099_S_0470
│ ├── 099_S_0492
│ ├── 099_S_1144
│ ├── 100_S_0743
│ ├── 100_S_0747
│ ├── 100_S_0893
│ ├── 100_S_1062
│ ├── 100_S_1113
│ ├── 109_S_0777
│ ├── 109_S_1157
│ ├── 109_S_1192
│ ├── 114_S_0228
│ ├── 114_S_0374
│ ├── 114_S_0979
│ ├── 116_S_0370
│ ├── 116_S_0392
│ ├── 116_S_0487
│ ├── 116_S_1083
│ ├── 121_S_0953
│ ├── 123_S_0088
│ ├── 123_S_0091
│ ├── 123_S_0094
│ ├── 123_S_0162
│ ├── 126_S_0606
│ ├── 126_S_0784
│ ├── 126_S_0891
│ ├── 126_S_1221
│ ├── 127_S_0431
│ ├── 127_S_0754
│ ├── 127_S_0844
│ ├── 127_S_1382
│ ├── 128_S_0167
│ ├── 128_S_0216
│ ├── 128_S_0266
│ ├── 128_S_0310
│ ├── 128_S_0517
│ ├── 128_S_0528
│ ├── 128_S_0701
│ ├── 128_S_0740
│ ├── 128_S_0805
│ ├── 128_S_1409
│ ├── 128_S_1430
│ ├── 130_S_0956
│ ├── 130_S_1201
│ ├── 130_S_1290
│ ├── 130_S_1337
│ ├── 131_S_0457
│ ├── 131_S_0497
│ ├── 131_S_0691
│ ├── 133_S_1055
│ ├── 133_S_1170
│ ├── 136_S_0194
│ ├── 136_S_0299
│ ├── 136_S_0300
│ ├── 137_S_0366
│ ├── 137_S_0438
│ ├── 137_S_0796
│ ├── 137_S_0841
│ ├── 137_S_1041
│ ├── 141_S_0340
│ ├── 141_S_0696
│ ├── 141_S_0790
│ ├── 141_S_0852
│ ├── 141_S_0853
│ ├── 141_S_1024
│ ├── 141_S_1137
│ └── 141_S_1152
└── NC_GM_subject_id
├── 002_S_0295
├── 002_S_0413
├── 002_S_0559
├── 002_S_0685
├── 002_S_1261
├── 002_S_1280
├── 003_S_0907
├── 003_S_0931
├── 003_S_0981
├── 003_S_1021
├── 005_S_0223
├── 005_S_0553
├── 005_S_0602
├── 005_S_0610
├── 006_S_0484
├── 006_S_0498
├── 006_S_0681
├── 006_S_0731
├── 007_S_0068
├── 007_S_0070
├── 007_S_1206
├── 007_S_1222
├── 009_S_0751
├── 009_S_0842
├── 009_S_0862
├── 010_S_0067
├── 010_S_0419
├── 010_S_0420
├── 010_S_0472
├── 011_S_0002
├── 011_S_0005
├── 011_S_0008
├── 011_S_0016
├── 011_S_0021
├── 011_S_0022
├── 011_S_0023
├── 012_S_0637
├── 012_S_1009
├── 012_S_1133
├── 012_S_1212
├── 013_S_0502
├── 013_S_0575
├── 013_S_1035
├── 013_S_1276
├── 014_S_0519
├── 014_S_0520
├── 014_S_0548
├── 014_S_0558
├── 016_S_0359
├── 016_S_0538
├── 018_S_0043
├── 018_S_0055
├── 018_S_0369
├── 018_S_0425
├── 020_S_0097
├── 020_S_0883
├── 020_S_0899
├── 020_S_1288
├── 021_S_0159
├── 021_S_0337
├── 021_S_0647
├── 021_S_0984
├── 022_S_0014
├── 022_S_0066
├── 022_S_0096
├── 022_S_0130
├── 023_S_0031
├── 023_S_0058
├── 023_S_0061
├── 023_S_0081
├── 023_S_0926
├── 023_S_0963
├── 023_S_1190
├── 023_S_1306
├── 024_S_0985
├── 024_S_1063
├── 027_S_0074
├── 027_S_0118
├── 027_S_0120
├── 027_S_0403
├── 029_S_0824
├── 029_S_0843
├── 029_S_0845
├── 029_S_0866
├── 031_S_0618
├── 032_S_0095
├── 032_S_0479
├── 032_S_0677
├── 032_S_1169
├── 033_S_0516
├── 033_S_0734
├── 033_S_0741
├── 033_S_0920
├── 033_S_0923
├── 033_S_1016
├── 033_S_1086
├── 033_S_1098
├── 035_S_0048
├── 035_S_0156
├── 035_S_0555
├── 036_S_0576
├── 036_S_0672
├── 036_S_0813
├── 036_S_1023
├── 037_S_0303
├── 037_S_0327
├── 037_S_0454
├── 037_S_0467
├── 041_S_0125
├── 041_S_0262
├── 041_S_0898
├── 041_S_1002
├── 051_S_1123
├── 052_S_0951
├── 052_S_1250
├── 052_S_1251
├── 057_S_0643
├── 057_S_0779
├── 057_S_0818
├── 057_S_0934
├── 062_S_0578
├── 062_S_0768
├── 062_S_1099
├── 067_S_0019
├── 067_S_0024
├── 067_S_0056
├── 067_S_0059
├── 067_S_0177
├── 067_S_0257
├── 068_S_0127
├── 068_S_0210
├── 068_S_1191
├── 072_S_0315
├── 073_S_0089
├── 073_S_0311
├── 073_S_0312
├── 073_S_0386
├── 082_S_0304
├── 082_S_0363
├── 082_S_0640
├── 082_S_0761
├── 082_S_1256
├── 094_S_0489
├── 094_S_0526
├── 094_S_0692
├── 094_S_0711
├── 094_S_1241
├── 094_S_1267
├── 098_S_0171
├── 098_S_0172
├── 098_S_0896
├── 099_S_0040
├── 099_S_0090
├── 099_S_0352
├── 099_S_0533
├── 099_S_0534
├── 100_S_0015
├── 100_S_0035
├── 100_S_0047
├── 100_S_0069
├── 100_S_1286
├── 109_S_0840
├── 109_S_0876
├── 109_S_0967
├── 109_S_1013
├── 109_S_1014
├── 114_S_0166
├── 114_S_0173
├── 114_S_0416
├── 114_S_0601
├── 116_S_0360
├── 116_S_0382
├── 116_S_0648
├── 116_S_0657
├── 116_S_1232
├── 116_S_1249
├── 123_S_0072
├── 123_S_0106
├── 123_S_0113
├── 123_S_0298
├── 126_S_0405
├── 126_S_0506
├── 126_S_0605
├── 126_S_0680
├── 127_S_0259
├── 127_S_0260
├── 127_S_0622
├── 127_S_0684
├── 128_S_0229
├── 128_S_0230
├── 128_S_0245
├── 128_S_0272
├── 128_S_0500
├── 128_S_0522
├── 128_S_0545
├── 128_S_0863
├── 128_S_1242
├── 129_S_0778
├── 130_S_0232
├── 130_S_0886
├── 130_S_0969
├── 130_S_1200
├── 131_S_0123
├── 131_S_0319
├── 131_S_0436
├── 131_S_0441
├── 131_S_1301
├── 133_S_0433
├── 133_S_0488
├── 133_S_0493
├── 133_S_0525
├── 136_S_0086
├── 136_S_0184
├── 136_S_0186
├── 136_S_0196
├── 137_S_0283
├── 137_S_0459
├── 137_S_0686
├── 137_S_0972
├── 141_S_0717
├── 141_S_0726
├── 141_S_0767
├── 141_S_0810
├── 141_S_1094
├── 941_S_1194
├── 941_S_1195
├── 941_S_1197
├── 941_S_1202
└── 941_S_1203
430 directories, 0 files
hcq@research:~/alzheimer_disease/ADNI_825/AD_NC_GM_subject_id/AD_NC_GM_subject_id_20180403/AD_GM_subject_id/002_S_0619$ ls
XSlice YSlice ZSlice
2) top_k_slices.txt which which majority selected by CNNs. (AlexNet)
slice_Z27|||82.29|||96.88
slice_X32|||81.25|||95.74
slice_X74|||81.25|||95.74
slice_Y76|||81.25|||96.88
slice_Z42|||81.25|||96.59
slice_X48|||81.25|||96.02
slice_X43|||80.21|||96.02
slice_Y81|||80.21|||97.16
slice_Y69|||80.21|||96.02
slice_Y64|||80.21|||95.17
slice_Z30|||80.21|||96.88
slice_Y80|||79.17|||96.31
slice_X45|||79.17|||96.88
slice_Z40|||79.17|||96.31
slice_Z29|||79.17|||95.74
slice_X39|||79.17|||96.31
slice_Y62|||79.17|||96.88
slice_Y73|||79.17|||96.88
slice_Z41|||79.17|||96.88
slice_Y77|||79.17|||97.44
silce_id ||| val_acc ||| train_acc
What you will get:
A prepared train/validation/test folders for training
hcq@research:~/alzheimer_disease/ADNI_825/experiments_FineTunning/majority_select_slices_folder_01$ tree -L 2
.
├── test
│ ├── AD ## 1089
│ └── NC ## 1287
├── train
│ ├── AD ## 14751
│ └── NC ## 16929
└── validation
├── AD ## 3861
└── NC ## 4455
9 directories, 0 files
Steps:
step1: get the subject id and partition these subject into train/val/test folders as the ratio (7.5 : 2 : 0.05)
step2: according to top_k_silces_id_txt, majority select top k slices
Script:
#!/usr/bin/python
# -*- coding: utf-8 -*-
import os
import re
import time
import datetime
import shutil
import random
from hcq_lib import *
train_percentage = 0.75
val_percentage = 0.2
test_percentage = 0.05
# len_slice_list_CascadeCNNs_AD = 199 ## 199 + 230
# len_slice_list_CascadeCNNs_NC = 230 ## 199 + 230
# rondom_list_AD = random.sample(range(0, len_slice_list_CascadeCNNs_AD), len_slice_list_CascadeCNNs_AD)
# rondom_list_NC = random.sample(range(0, len_slice_list_CascadeCNNs_NC), len_slice_list_CascadeCNNs_NC)
dataset_path = "/home/hcq/alzheimer_disease/ADNI_825/AD_NC_GM_subject_id/AD_NC_GM_subject_id_20180403"
root_txt_path = "/home/hcq/alzheimer_disease/ADNI_825/AD_NC_GM_subject_id"
top_k_silces_id_txt = os.path.join(root_txt_path, "top_k_slices.txt")
log_path = os.path.join(root_txt_path, "log", "log.txt")
def partition_slice_train_val_test(silce_txt, dataset_dir, label):
## rules ##
## old_name = root_path + subject_id + top_k_slices_id
## subject_id: random select
## top_k_slices_id: majority select by CNNs
## step1: get the subject id and partition these subject into train/val/test folders as the ratio (7.5 : 2 : 0.05)
## added by hcq 20180404
train_subject_id = []
val_subject_id = []
test_subject_id = []
# get subject_id list
subject_id_list = []
with open(silce_txt, "r") as silce_txt_list:
for item in silce_txt_list:
item = item.replace("\n", "")
item = item.replace("\r", "")
# print(item)
subject_id = item.split('\\')[3]
if(subject_id not in subject_id_list):
subject_id_list.append(subject_id)
num_train = 0
num_val = 0
num_test = 0
len_slice_list = len(subject_id_list)
rondom_list = random.sample(range(0, len_slice_list), len_slice_list)
hcq_write(log_path, True, True, "rondom_list [{}]".format(label))
hcq_write(log_path, False, False, rondom_list)
for i in range(len_slice_list):
random_id = rondom_list[i]
if(num_train < int(len_slice_list*train_percentage)):
# print("[Train] {}".format(subject_id_list[random_id]))
train_subject_id.append(subject_id_list[random_id])
num_train += 1
elif(num_val < int(len_slice_list*val_percentage)):
# print("[val] {}".format(subject_id_list[random_id]))
val_subject_id.append(subject_id_list[random_id])
num_val += 1
else:
# print("[test] {}".format(subject_id_list[random_id]))
test_subject_id.append(subject_id_list[random_id])
num_test += 1
# print("[len_slice_list] {}".format(len_slice_list))
# print("[num_train] {}".format(num_train))
# print("[num_val] {}".format(num_val))
# print("[num_test] {}".format(num_test))
hcq_write(log_path, True, True, "[len_slice_list] {}".format(len_slice_list))
hcq_write(log_path, True, True, "[num_train] {}".format(num_train))
hcq_write(log_path, True, True, "[num_val] {}".format(num_val))
hcq_write(log_path, True, True, "[num_test] {}".format(num_test))
### step2: according to top_k_silces_id_txt, majority select top k slices;
### added by hcq 20180404
move_slice(train_subject_id, dataset_dir, "train", label)
move_slice(val_subject_id, dataset_dir, "validation", label)
move_slice(test_subject_id, dataset_dir, "test", label)
def move_slice(subject_id_folder_list, dataset_dir, folder_name, label):
root_new_path = "/home/hcq/alzheimer_disease/ADNI_825/experiments_FineTunning/"
dataset_name = "majority_select_slices_folder_01"
new_name_path = os.path.join(root_new_path, dataset_name, folder_name, label)
hcq_create_dir(new_name_path)
for subject_id in subject_id_folder_list:
with open(top_k_silces_id_txt, "r") as top_k_silces_id_txt_list:
for item in top_k_silces_id_txt_list:
item = item.replace("\n", "")
item = item.replace("\r", "")
slice_id = item.split('|||')[0]
slice_id = slice_id + ".jpg"
if "X" in slice_id:
old_name = os.path.join(dataset_dir, subject_id, "XSlice", slice_id)
elif("Y" in slice_id):
old_name = os.path.join(dataset_dir, subject_id, "YSlice", slice_id)
elif("Z" in slice_id):
old_name = os.path.join(dataset_dir, subject_id, "ZSlice", slice_id)
slice_name = subject_id + "_" + slice_id
new_name = os.path.join(new_name_path, slice_name)
# print(old_name)
# print(new_name)
hcq_write(log_path, True, True, new_name)
shutil.copyfile(old_name, new_name)
if __name__=="__main__":
###
AD_silce_txt = os.path.join(root_txt_path, "AD_GM_subject_id_path.txt")
NC_silce_txt = os.path.join(root_txt_path, "NC_GM_subject_id_path.txt")
print("AD_silce_txt = {}".format(AD_silce_txt))
print("NC_silce_txt = {}".format(NC_silce_txt))
dataset_AD = os.path.join(dataset_path, "AD_GM_subject_id")
dataset_NC = os.path.join(dataset_path, "NC_GM_subject_id")
partition_slice_train_val_test(AD_silce_txt, dataset_AD, "AD")
partition_slice_train_val_test(NC_silce_txt, dataset_NC, "NC")