切割patch
运行Patch_Generation/gen_patch_noLabel_stride_MultiProcessing_multiScales.py
更改一下配置
slides_folder_dir = '存放wsi的路径位置' #D:\DataSet\CAMELYON16\Original_Image
slide_paths = glob.glob(os.path.join(slides_folder_dir, '*.tif')) # change the surfix 'wsi文件格式' to other if necessary
save_folder_dir = '存放结果的位置' #D:\DataSet\CAMELYON16\process
提取特征&分数据集
运行main_Extract_PerSlide.py
参考配置
--data_dir='存放pkl文件的上一级路径' #D:\DataSet\CAMELYON16\process\10.0
--log_dir='存放结果的位置' #D:\DataSet\CAMELYON16\process
根据csv分数据集,csv列名由train,val,test组成
import glob
import os
import pickle
import pandas as pd
# 替换为你的.pkl文件路径
file_path = 'D:/DataSet/CAMELYON16/process/mDATA_folder'
save_path = 'D:/DataSet/CAMELYON16/process/mDATA_train_folder'
# 替换为你的csv文件路径
df = pd.read_csv('G:/project/CLAM-master/splits/task_1_tumor_vs_normal_100/splits_0.csv')
patch_dirs = []
mDATA_train = {}
mDATA_val = {}
mDATA_test = {}
feature_paths = glob.glob( os.path.join(file_path, '*.pkl'))
for slide_file in feature_paths:
with open(slide_file, 'rb') as file:
# 使用pickle.load()加载对象
data = pickle.load(file)
file_name = os.path.basename(slide_file).split('.')[0]
tinfo = file_name.split('_')
slide_name = '_'.join(tinfo[:2])
for column in df.columns:
if df[column].astype(str).str.contains(slide_name).any():
if column == "train":
mDATA_train[slide_name] = data
if column == "val":
mDATA_val[slide_name] = data
if column == "test":
mDATA_test[slide_name] = data
if not os.path.exists(save_path):
os.makedirs(save_path)
slide_train_save_path = os.path.join(save_path, 'train.pkl')
slide_val_save_path = os.path.join(save_path, 'val.pkl')
slide_test_save_path = os.path.join(save_path, 'test.pkl')
with open(slide_train_save_path, 'wb') as f:
pickle.dump(mDATA_train, f)
with open(slide_val_save_path, 'wb') as f:
pickle.dump(mDATA_val, f)
with open(slide_test_save_path, 'wb') as f:
pickle.dump(mDATA_test, f)
训练
运行Main_DTFD_MIL.py
参考配置
--mDATA0_dir_train0=D:\DataSet\CAMELYON16\process\mDATA_train_folder\train.pkl
--mDATA0_dir_val0=D:\DataSet\CAMELYON16\process\mDATA_train_folder\val.pkl
--mDATA_dir_test0=D:\DataSet\CAMELYON16\process\mDATA_train_folder\test.pkl
#C16官方测试集标注文件
df = pd.read_csv('G:/dataset/C16/reference.csv', header=None)
#修改源码中数据集的导入统一使用此方法,不再使用reOrganize_mDATA_test方法
def reOrganize_mDATA(mDATA):
SlideNames = []
FeatList = []
Label = []
for slide_name in mDATA.keys():
SlideNames.append(slide_name)
if slide_name.startswith('tumor'):
label = 1
elif slide_name.startswith('normal'):
label = 0
#比较源码增加此处,读取test标签
elif df[df[df.columns[0]] == slide_name][df.columns[1]].values[0] == 'Tumor':
label = 1
elif df[df[df.columns[0]] == slide_name][df.columns[1]].values[0] == 'Normal':
label = 0
else:
raise RuntimeError('Undefined slide type')
Label.append(label)
patch_data_list = mDATA[slide_name]
featGroup = []
for tpatch in patch_data_list:
tfeat = torch.from_numpy(tpatch['feature'])
featGroup.append(tfeat.unsqueeze(0))
featGroup = torch.cat(featGroup, dim=0) ## numPatch x fs
FeatList.append(featGroup)
return SlideNames, FeatList, Label