一、Monai安装
Step 1: 创建新环境,conda create --name monai_env python=3.10
Step 2: 安装torch gpu, conda install pytorch torchvision torchaudio pytorch-cuda=11.8 -c pytorch -c nvidia
根据电脑的cuda版本在pytorch官网获取相应的命令,若不使用GPU版本的pytorch则直接到Step 3
Step 3: 安装Monai, pip install monai
Setp 4: 安装itk, nibabel包,pip install itk
,pip install nibabel
Step 5: 安装matplotlib, jupyter等其它依赖包,pip install matplotlib
,pip install jupyter
二、使用Monai进行数据增强
使用Monai进行数据增强有2种方式:1. 边训练边增强,每个epoch使用的训练数据的数量与增强前相同;2. 保存增强图像后一次性加载全部增强数据和原始数据。
1. 边训练边增强
将数据增强写在train_transforms,则每个epoch加载的数据集都是引入随机变换的。
由于每个epoch的数据不同,因此训练过程震荡较大。
train_transforms = Compose(
[
LoadImaged(keys=["image", "label"]),
EnsureChannelFirstd(keys=["image", "label"]),
ScaleIntensityRanged(
keys=["image"],
a_min=0,
a_max=500,
b_min=0.0,
b_max=1.0,
clip=True, # if clip = True, intensity < a_min, intensity = b_min, intensity > a_max, intensity = b_max
),
CropForegroundd(keys=["image", "label"], source_key="image"), # 裁剪得到图像中像素值大于0的区域,get only one
Orientationd(keys=["image", "label"], axcodes="RAS"),
Spacingd(keys=["image", "label"], pixdim=(0.9, 0.9, 1.2), mode=("bilinear", "nearest")), # resample image, pixedim is the target spacing
# data augmentation
RandRotate90d(keys=['image', 'label'], prob = 0.2), # 随机旋转图像,不影响数据量,但每个epoch的transforms不同
RandAxisFlipd(keys=['image', 'label'], prob=0.2), # 随机任意轴翻转
RandGaussianNoised(keys=['image', 'label'], prob=0.2), # 随机Gaussian噪声
RandCropByPosNegLabeld(
keys=["image", "label"],
label_key="label", # 用于查找前景和背景
spatial_size=(96, 96, 96), # ROI的大小
pos=1, # 与neg一起,计算选前景体素作为ROI中心的概率, pos / (pos + neg)
neg=1,
num_samples=8, # 返回多少个子图
image_key="image", # 使用label==0 and image > image_threshold部分作为阴性样本
image_threshold=0,
),
]
)
val_transforms = Compose(
[
LoadImaged(keys=["image", "label"]),
EnsureChannelFirstd(keys=["image", "label"]),
ScaleIntensityRanged(
keys=["image"],
a_min=0,
a_max=500,
b_min=0.0,
b_max=1.0,
clip=True,
),
CropForegroundd(keys=["image", "label"], source_key="image"),
Orientationd(keys=["image", "label"], axcodes="RAS"),
Spacingd(keys=["image", "label"], pixdim=(0.9, 0.9, 1.2), mode=("bilinear", "nearest")),
]
)
train_ds = CacheDataset(data=train_files, transform=train_transforms, cache_rate=1.0, num_workers=4)
# train_ds = Dataset(data=train_files, transform=train_transforms)
# use batch_size=2 to load images and use RandCropByPosNegLabeld
# to generate batch_size x num_samples(in tranin_transforms) images for network training
train_loader = DataLoader(train_ds, batch_size=1, shuffle=True, num_workers=4)
val_ds = CacheDataset(data=val_files, transform=val_transforms, cache_rate=1.0, num_workers=4)
# val_ds = Dataset(data=val_files, transform=val_transforms)
val_loader = DataLoader(val_ds, batch_size=1, num_workers=4)
2. 保存增强图像
需要注意的是由于是随机变换,因此需要检查增强数据和原始数据中是否有相同的数据。
import math
generate_transforms = Compose(
[
LoadImaged(keys=["image", "label"], image_only=False),
EnsureChannelFirstd(keys=["image", "label"]), # !!!!不然Spacingd时会忽略第1个通道,第1个通道的维度则不会发生变化
# EnsureTyped(keys=["image", "label"]),
Spacingd(keys=["image", "label"], pixdim=(0.9, 0.9, 1.2), mode=("bilinear", "nearest")),
Orientationd(keys=["image", "label"], axcodes='RAS'),
ScaleIntensityRanged(keys=["image"], a_min=0, a_max=500,b_min=0.0, b_max=1.0, clip=True,),
# #=====data augmentation ======
RandAffined(keys=['image', 'label'], prob=0.5, translate_range=10, rotate_range=0, shear_range=0),
RandRotated(keys=['image', 'label'], prob=0.5, range_x=5./180*math.pi, range_y=5./180*math.pi, range_z=5./180*math.pi), # radian
RandGaussianNoised(keys='image', prob=0.5),
])
from monai.transforms import SaveImage
output_path = '.\\3Dircadb1\\aug'
number_runs = 8
check_ds = Dataset(data=data_dicts, transform=generate_transforms)
check_loader = DataLoader(check_ds, batch_size=1)
for i in range(number_runs):
name_folder = 'generated_data_' + str(i)
# name_folder = 'original'
if os.path.exists(os.path.join(output_path, name_folder)):
shutil.rmtree(os.path.join(output_path, name_folder))
os.mkdir(os.path.join(output_path, name_folder))
output = os.path.join(output_path, name_folder)
# print(output)
for index, patient in enumerate(check_loader):
# continue
# print(patient['image'].max())
SaveImage(squeeze_end_dims=True, output_dir=output, output_postfix="aug"+str(i), resample=False)(patient['image'][0])
SaveImage(squeeze_end_dims=True, output_dir=output, output_postfix="aug"+str(i), resample=False)(patient['label'][0])
# SaveImage(squeeze_end_dims=True, output_dir=output, output_postfix="", resample=False)(patient['image'][0])
# SaveImage(squeeze_end_dims=True, output_dir=output, output_postfix="", resample=False)(patient['label'][0])
三、加载数据训练模型
Monai加载数据可以使用CacheDataset
和Dataset
两种。
1. Dataset
每个epoch加载数据,并根据定义的transform进行变换,耗时相对CacheDataset
较长。
2. CacheDataset
训练前先把训练数据缓存起来,但它的处理会根据transform分为两部分。a. 将transform中非随机的处理(LoadNiftid
,AddChanneld
, Spacingd
, Orientationd
, ScaleIntensityRanged
这些就是每个数据都会执行的 )在训练之前处理完,缓存起来。b,带随机的处理(RandCropByPosNegLabeld
这个就是随机裁剪)需要在每次epoch的时候完成,因为每个数据处理的都不一样。因此在使用CacheDataset
时,定义transforms尽量将非随机处理变换写前面,随机处理变换写后面。
!!!需要特别注意的是若GPU内存较少,数据集较大时使用CacheDataset可能会导致GPU显存不够而报错或者导致kernel crash。
3. DataLoader
train_ds = CacheDataset(data=train_files, transform=train_transforms, cache_rate=1.0, num_workers=4)
# train_ds = Dataset(data=train_files, transform=train_transforms)
# use batch_size=2 to load images and use RandCropByPosNegLabeld
# to generate batch_size x num_samples(in tranin_transforms) images for network training
train_loader = DataLoader(train_ds, batch_size=1, shuffle=True, num_workers=4)
若在训练时发生错误Couldn‘t open shared file mapping: <.........>, error code: <1455>
,是因为显卡太旧或计算压力太大。将DatasetLoader
中的num_works
设置为0即可解决。
4. 典型的训练代码
利用pytorch训练的典型代码如下所示:
max_epochs = 600
val_interval = 5
best_metric = -1
best_metric_epoch = -1
epoch_loss_values = []
metric_values = []
post_pred = Compose([AsDiscrete(argmax=True, to_onehot=2)])
post_label = Compose([AsDiscrete(to_onehot=2)])
for epoch in range(max_epochs):
print("-" * 10)
print(f"epoch {epoch + 1}/{max_epochs}")
model.train()
epoch_loss = 0
step = 0
for batch_data in train_loader:
step += 1
inputs, labels = (
batch_data["image"].to(device),
batch_data["label"].to(device),
)
labels[labels>0] = 1
optimizer.zero_grad() # 梯度归零
outputs = model(inputs)
loss = loss_function(outputs, labels)
loss.backward() # 梯度反向传播
optimizer.step() # update the parameters
epoch_loss += loss.item()
print(f"{step}/{len(train_ds) // train_loader.batch_size}, " f"train_loss: {loss.item():.4f}")
epoch_loss /= step
epoch_loss_values.append(epoch_loss)
print(f"epoch {epoch + 1} average loss: {epoch_loss:.4f}")
if (epoch + 1) % val_interval == 0:
model.eval() # 不启用Batch Normalization and Dropout
with torch.no_grad():
for val_data in val_loader:
val_inputs, val_labels = (
val_data["image"].to(device),
val_data["label"].to(device),
)
val_labels[val_labels>0] = 1
roi_size = (160, 160, 160)
sw_batch_size = 4
val_outputs = sliding_window_inference(val_inputs, roi_size, sw_batch_size, model)
val_outputs = [post_pred(i) for i in decollate_batch(val_outputs)]
val_labels = [post_label(i) for i in decollate_batch(val_labels)]
# compute metric for current iteration
dice_metric(y_pred=val_outputs, y=val_labels)
# aggregate the final mean dice result
metric = dice_metric.aggregate().item()
# reset the status for next validation round
dice_metric.reset()
metric_values.append(metric)
if metric > best_metric:
best_metric = metric
best_metric_epoch = epoch + 1
# torch.save(model.state_dict(), os.path.join(root_dir, "best_metric_model.pth"))
torch.save(model.state_dict(), "best_metric_model.pth")
print("saved new best metric model")
print(
f"current epoch: {epoch + 1} current mean dice: {metric:.4f}"
f"\nbest mean dice: {best_metric:.4f} "
f"at epoch: {best_metric_epoch}"
)
关于训练过程中optimizer.zero_grad()
,loss.backward()
和optimizer.step()
三个函数的理解可以参考 https://blog.csdn.net/PanYHHH/article/details/107361827
1. sliding_window_inference
val_outputs = sliding_window_inference(val_inputs, roi_size, sw_batch_size, model)
Monai实现的通过滑动窗口的方式推理获得model预测结果,val_inputs是原始图像大小的输入,val_outputs是与val_inputs输入相对应的model输出结果,如对于一个单类别的3D分割问题,若输入val_inputs的shape是(1, 1, H, W, D)
,则输出val_outputs的shape是(1, 2, H, W, D)
,其中第1个通道是背景类的预测,第2个通道是目标类的预测。
2. decollate_batch
decollate_batch
将"batched" data转变成a list of tensors
https://github.com/Project-MONAI/tutorials/blob/main/modules/decollate_batch.ipynb
四、Inference and save segmentation mask to nifity
在医学图像分割任务中需要将分割结果保存成nifity格式进行查看,下面的代码在测试集上执行推理,并将推理结果保存成Nifity格式。
代码中的Invertd可以将分割结果变换为test_org_transforms变换前的图像,即原始图像(包括分辨率Spacing,图像大小shape,方向等)。
需要注意的是下述代码保存得到的分割结果是两个通道的结果,即(2, H, W, D)
,第1个通道是背景的mask,第2个通道是目标类的mask。若想只保存目标类的mask,需要将post_transforms
中的AsDiscreted
中的to_onehot=2
去掉,即AsDiscreted(keys="pred", argmax=True)
。One hot是指将输出类别的概率归一化(针对某一个点的类别预测,预测的全部类别的概率和为1,这样具有最大概率的类别即为该点的label)。
test_images = sorted(glob.glob(os.path.join(".\\3Dircadb1\\test", "Patient_" + "*.nii.gz")))
test_data = [{"image": image} for image in test_images]
test_org_transforms = Compose(
[
LoadImaged(keys="image"),
EnsureChannelFirstd(keys="image"),
Orientationd(keys=["image"], axcodes="RAS"),
Spacingd(keys=["image"], pixdim=(0.9, 0.9, 1.2), mode="bilinear"),
ScaleIntensityRanged(
keys=["image"],
a_min=0,
a_max=500,
b_min=0.0,
b_max=1.0,
clip=True,
),
CropForegroundd(keys=["image"], source_key="image"),
]
)
test_org_ds = Dataset(data=test_data, transform=test_org_transforms)
test_org_loader = DataLoader(test_org_ds, batch_size=1, num_workers=4)
post_transforms = Compose(
[
Invertd(
keys="pred", # # invert the `pred` data field, also support multiple fields
transform=test_org_transforms,
orig_keys="image", # get the previously applied pre_transforms information on the `img` data field
meta_keys="pred_meta_dict", # key field to save inverted meta data, every item maps to `keys`
orig_meta_keys="image_meta_dict", # use the meta data from `img_meta_dict` field when inverting
meta_key_postfix="meta_dict",
nearest_interp=False, # don't change the interpolation mode of preprocessing when inverting
to_tensor=True,
),
AsDiscreted(keys="pred", argmax=True, to_onehot=2),
SaveImaged(keys="pred", meta_keys="pred_meta_dict", output_dir="./out", output_postfix="seg", resample=False), # 保存结果为两个类,两个通道
]
)
model.load_state_dict(torch.load("best_metric_model_dice.pth"))
model.eval()
with torch.no_grad():
for test_data in test_org_loader:
test_inputs = test_data["image"].to(device)
roi_size = (160, 160, 160)
sw_batch_size = 4
test_data["pred"] = sliding_window_inference(test_inputs, roi_size, sw_batch_size, model) # shape = (1, 2, 368, 352, 162)
test_data = [post_transforms(i) for i in decollate_batch(test_data)]