i += 1
else:
patient_dir = os.path.join(root, sub_dir)
ground_paths, inphase_paths = collect_T1_name(patient_dir)
for num in range(len(ground_paths)):
dst_groundpath = os.path.join(dst_TestGround, "T1\_Patient%s\_No%d.png" % (sub_dir, num))
shutil.copy(ground_paths[num], dst_groundpath)
for num in range(len(inphase_paths)):
dst_inphasepath = os.path.join(dst_TestData, "T1\_Patient%s\_No%d.dcm" % (sub_dir, num))
shutil.copy(inphase_paths[num], dst_inphasepath)
i += 1
该段程序的作用就是将训练集中T1/InPhase 的20个病例划分成16个训练集,4个测试集,并重新存储到自定义的文件夹下. 对于图像文件也进行了命名规范,对第i个病人的第j张slice,命名规则为**T1\_Patienti\_Noj.dcm**
## 分离出肝脏
GroundTruth的图像是多器官的,根据灰度范围进行判断。
![在这里插入图片描述](https://img-blog.csdnimg.cn/20200806143226468.png?x-oss-process=image/watermark,type_ZmFuZ3poZW5naGVpdGk,shadow_10,text_aHR0cHM6Ly9ibG9nLmNzZG4ubmV0L3FxXzM0MDAzODc2,size_16,color_FFFFFF,t_70)
从自带的config文件中可以查看灰度范围:
![在这里插入图片描述](https://img-blog.csdnimg.cn/20200806143343610.png?x-oss-process=image/watermark,type_ZmFuZ3poZW5naGVpdGk,shadow_10,text_aHR0cHM6Ly9ibG9nLmNzZG4ubmV0L3FxXzM0MDAzODc2,size_16,color_FFFFFF,t_70)
因为GroundTruth是png格式,我们使用OpenCV做一下简单的阈值处理就可以提取肝脏部分了.
“”"
@ Date: 2020/6/29
@ Author: Xiao Zhuo
@ Brief: Extract liver part from GroundTruth and set white color
@ Filename: extract_only_liver_2.py
“”"
-*- coding: utf-8 -*-
import os
import cv2
def makedir(dir):
if not os.path.exists(dir):
os.mkdir(dir)
def extract_liver(dataset_dir):
src_names = os.listdir(dataset_dir)
if src_names[0] == ‘Liver’:
src_names.remove(‘Liver’)
src_count = len(src_names)
dst_dir = os.path.join(dataset_dir, “Liver”)
makedir(dst_dir)
for num in range(src_count):
src_path = os.path.join(dataset_dir, src_names[num])
src = cv2.imread(src_path) # OpenCV读进来要指定是灰度图像,不然默认三通道。这里之前忘记指定了
# flag = 0
flag = 1
for i in range(src.shape[0]):
for j in range(src.shape[1]):
for k in range(src.shape[2]):
if 55 <= src.item(i, j, k) <= 70:
flag = 1 # 表示有肝脏
src.itemset((i, j, k), 255)
else:
src.itemset((i, j, k), 0)
if flag == 1:
dst_path = os.path.join(dst_dir, src_names[num])
cv2.imwrite(dst_path, src)
if name == ‘__main__’:
train_dir = os.path.join(“data”, “train”, “Ground”)
test_dir = os.path.join(“data”, “val”, “Ground”)
extract_liver(train_dir)
extract_liver(test_dir)
提取后的肝脏二值化掩膜如图所示:
![在这里插入图片描述](https://img-blog.csdnimg.cn/20200806143648297.png?x-oss-process=image/watermark,type_ZmFuZ3poZW5naGVpdGk,shadow_10,text_aHR0cHM6Ly9ibG9nLmNzZG4ubmV0L3FxXzM0MDAzODc2,size_16,color_FFFFFF,t_70)
## 将dicom文件转换为png格式
这一步实际上也可以不做,原因是dicom中的图像数据原本是16位的,若是转换成8位的png格式可能会导致数据精度丢失。使用SimpleITK直接读取Array送入U-net其实就可以运行了.
但我要多此一举的原因是,我想做数据增强. 但是现有的数据增强工具好像不能处理Array或者numpy等格式的数据,自己又没有那个水平重新写一个数据增强的API。没办法,就转换成png简单处理吧.
“”"
@file name : conver2png.py
@author : Peter
@date : 2020-07-01
@brief : 将dicom格式转换成png格式
“”"
import pydicom
import os
import matplotlib.pyplot as plt
from skimage import img_as_float
path_1 = “./data/val/Data”
path_2 = “./data/train/Data”
def dicom_2png(orifile, savefile, width, height):
_currFile = orifile
dcm = pydicom.dcmread(orifile)
# fileName = os.path.basename(file)
imageX = dcm.pixel_array
temp = imageX.copy()
picMax = imageX.max()
vmin = imageX.min()
vmax = temp[temp < picMax].max()
# print("vmin : ", vmin)
# print("vmax : ", vmax)
imageX[imageX > vmax] = 0
imageX[imageX < vmin] = 0
# result = exposure.is_low_contrast(imageX)
# # print(result)
image = img_as_float(imageX)
plt.cla()
plt.figure(‘adjust_gamma’, figsize=(width/100, height/100))
plt.subplots_adjust(top=1, bottom=0, left=0, right=1, hspace=0, wspace=0)
plt.imshow(image, ‘gray’)
plt.axis(‘off’)
plt.savefig(savefile)
if name == ‘__main__’:
names = os.listdir(path_1)
for i in range(len(names)):
dicom_path = os.path.join(path_1, names[i])
png_name = os.path.splitext(names[i])[0]
dst_path = os.path.join(‘./data/val/Data_8bit’, (png_name + ‘.png’))
dicom_2png(dicom_path, dst_path, 256, 256)
names = os.listdir(path_2)
for i in range(len(names)):
dicom_path = os.path.join(path_2, names[i])
png_name = os.path.splitext(names[i])[0]
dst_path = os.path.join('./data/train/Data\_8bit', (png_name + '.png'))
dicom_2png(dicom_path, dst_path, 256, 256)
转换后一目了然,不需要再用MicroDicom去查看
![在这里插入图片描述](https://img-blog.csdnimg.cn/20200806144947553.png?x-oss-process=image/watermark,type_ZmFuZ3poZW5naGVpdGk,shadow_10,text_aHR0cHM6Ly9ibG9nLmNzZG4ubmV0L3FxXzM0MDAzODc2,size_16,color_FFFFFF,t_70)
## 数据增强
我使用Augmentor工具.
导入数据增强工具
import Augmentor
确定原始图像存储路径以及掩码文件存储路径
p = Augmentor.Pipeline(“./data/train/Data”)
p.ground_truth(“./data/train/Ground”)
图像旋转: 按照概率0.8执行,最大左旋角度10,最大右旋角度10
p.rotate(probability=0.8, max_left_rotation=10, max_right_rotation=10)
图像左右互换: 按照概率0.5执行
p.flip_left_right(probability=0.5)
图像放大缩小: 按照概率0.8执行,面积为原始图0.85倍
p.zoom_random(probability=0.3, percentage_area=0.85)
最终扩充的数据样本数
p.sample(400)
当然,增强的图片还可以重新命个名,按照序号来:
import os
Data_path = “./data/train/Data_aug”
Ground_path = “./data/train/Ground_aug”
data_names = os.listdir(Data_path)
ground_names = os.listdir(Ground_path)
for i in range(len(data_names)):
used_name = os.path.join(Data_path, data_names[i])
new_name = os.path.join(Data_path, “Aug_No_%d.png” % i)
os.rename(used_name, new_name)
for i in range(len(ground_names)):
used_name = os.path.join(Ground_path, ground_names[i])
new_name = os.path.join(Ground_path, “Aug_No_%d.png” % i)
os.rename(used_name, new_name)
网络搭建和训练部分,我使用的是Python3.7 + Pytorch 1.4.0.
## U-net网络搭建
就是经典的网络结构,不过我加了尝试加了几个Dropout层.
“”"
@ filename: unet.py
“”"
import torch
from torch import nn
class DoubleConv(nn.Module):
def __init__(self, in_ch, out_ch):
super(DoubleConv, self).init()
self.conv = nn.Sequential(
nn.Conv2d(in_ch, out_ch, 3, padding=1),
nn.BatchNorm2d(out_ch),
nn.ReLU(inplace=True),
nn.Conv2d(out_ch, out_ch, 3, padding=1),
nn.BatchNorm2d(out_ch),
nn.ReLU(inplace=True)
)
def forward(self, input):
return self.conv(input)
class Unet(nn.Module):
def __init__(self, in_ch, out_ch):
super(Unet, self).init()
self.conv1 = DoubleConv(in_ch, 64)
self.pool1 = nn.MaxPool2d(2)
self.conv2 = DoubleConv(64, 128)
self.pool2 = nn.MaxPool2d(2)
self.conv3 = DoubleConv(128, 256)
self.pool3 = nn.MaxPool2d(2)
self.conv4 = DoubleConv(256, 512)
self.pool4 = nn.MaxPool2d(2)
self.conv5 = DoubleConv(512, 1024)
self.up6 = nn.ConvTranspose2d(1024, 512, 2, stride=2)
self.conv6 = DoubleConv(1024, 512)
self.up7 = nn.ConvTranspose2d(512, 256, 2, stride=2)
self.conv7 = DoubleConv(512, 256)
self.up8 = nn.ConvTranspose2d(256, 128, 2, stride=2)
self.conv8 = DoubleConv(256, 128)
self.up9 = nn.ConvTranspose2d(128, 64, 2, stride=2)
self.conv9 = DoubleConv(128, 64)
self.conv10 = nn.Conv2d(64, out_ch, 1)
self.dropout = nn.Dropout(p=0.2)
def forward(self, x):
c1 = self.conv1(x)
p1 = self.pool1(c1)
p1 = self.dropout(p1)
c2 = self.conv2(p1)
p2 = self.pool2(c2)
p2 = self.dropout(p2)
c3 = self.conv3(p2)
p3 = self.pool3(c3)
p3 = self.dropout(p3)
c4 = self.conv4(p3)
p4 = self.pool4(c4)
p4 = self.dropout(p4)
c5 = self.conv5(p4)
up_6 = self.up6(c5)
merge6 = torch.cat([up_6, c4], dim=1)
merge6 = self.dropout(merge6)
c6 = self.conv6(merge6)
up_7 = self.up7(c6)
merge7 = torch.cat([up_7, c3], dim=1)
merge7 = self.dropout(merge7)
c7 = self.conv7(merge7)
up_8 = self.up8(c7)
merge8 = torch.cat([up_8, c2], dim=1)
merge8 = self.dropout(merge8)
c8 = self.conv8(merge8)
up_9 = self.up9(c8)
merge9 = torch.cat([up_9, c1], dim=1)
merge9 = self.dropout(merge9)
c9 = self.conv9(merge9)
c10 = self.conv10(c9)
# out = nn.Sigmoid()(c10)
return c10
## 自定义Dataset
make\_dataset方法获取原始图像和分割掩膜的图像路径名,LiverDateset类继承torch的数据集类,通过make\_dataset的路径名利用PIL Image库读取文件,并进行transforms变换成归一化的Tensor数据.
“”"
@ filename: dataset.py
@ author: Peter Xiao
@ Date: 2020/5/1
@ Brief: 自定义肝脏数据集
“”"
from torch.utils.data import Dataset
import PIL.Image as Image
import os
def make_dataset(root):
# root = “./data/train”
imgs = []
ori_path = os.path.join(root, “Data”)
ground_path = os.path.join(root, “Ground”)
names = os.listdir(ori_path)
n = len(names)
for i in range(n):
img = os.path.join(ori_path, names[i])
mask = os.path.join(ground_path, names[i])
imgs.append((img, mask))
return imgs
class LiverDataset(Dataset):
def __init__(self, root, transform=None, target_transform=None):
imgs = make_dataset(root)
self.imgs = imgs
self.transform = transform
self.target_transform = target_transform
def \_\_getitem\_\_(self, index):
x_path, y_path = self.imgs[index]
img_x = Image.open(x_path).convert('L')
img_y = Image.open(y_path).convert('L')
if self.transform is not None:
img_x = self.transform(img_x)
if self.target_transform is not None:
img_y = self.target_transform(img_y)
return img_x, img_y
def \_\_len\_\_(self):
return len(self.imgs)
## Main.py
Main文件主要有三个功能,训练、预测(包括生成可视化图像)和计算Dice系数. 主程序利用了argparse模块作命令行,可以自行修改.
这里提醒一点:我训练时使用的GPU是GTX1650,显存4G. batch\_size设在4刚刚好,调大了会爆显存,无法训练. 在实验室的2080Ti上用16的BT训练,占用显存为9.1G,可以根据这个比例结合自己的GPU调整Batch\_size.
“”"
@ filename: main.py
@ author: Peter Xiao
@ date: 2020/5/1
@ brief: MR肝脏分割,训练、测试和计算Dice系数
“”"
import torch
import argparse
import cv2
import os
import numpy as np
import matplotlib.pyplot as plt
from torch.utils.data import DataLoader
from torch import nn, optim
from torchvision.transforms import transforms
from unet import Unet
from denseunet import DenseUNet_65, DenseUNet_167
from dataset import LiverDataset
from tools.common_tools import transform_invert
val_interval = 1
是否使用cuda
device = torch.device(“cuda” if torch.cuda.is_available() else “cpu”)
x_transforms = transforms.Compose([
transforms.ToTensor(),
])
mask只需要转换为tensor
y_transforms = transforms.ToTensor()
train_curve = list()
valid_curve = list()
def train_model(model, criterion, optimizer, dataload, num_epochs=80):
model_path = “./model/Aug/weights_20.pth”
if os.path.exists(model_path):
model.load_state_dict(torch.load(model_path, map_location=device))
start_epoch = 20
print(‘加载成功!’)
else:
start_epoch = 0
print(‘无保存模型,将从头开始训练!’)
for epoch in range(start_epoch+1, num_epochs):
print('Epoch {}/{}'.format(epoch, num_epochs))
print('-' \* 10)
dt_size = len(dataload.dataset)
epoch_loss = 0
step = 0
for x, y in dataload:
step += 1
inputs = x.to(device)
labels = y.to(device)
# zero the parameter gradients
optimizer.zero_grad()
# forward
outputs = model(inputs)
loss = criterion(outputs, labels)
loss.backward()
optimizer.step()
epoch_loss += loss.item()
train_curve.append(loss.item())
print("%d/%d,train\_loss:%0.3f" % (step, (dt_size - 1) // dataload.batch_size + 1, loss.item()))
print("epoch %d loss:%0.3f" % (epoch, epoch_loss/step))
if (epoch + 1) % 20 == 0:
torch.save(model.state_dict(), './model/Aug/weights\_%d.pth' % (epoch + 1))
# Validate the model
valid_dataset = LiverDataset("data/val", transform=x_transforms, target_transform=y_transforms)
valid_loader = DataLoader(valid_dataset, batch_size=4, shuffle=True)
if (epoch + 2) % val_interval == 0:
loss_val = 0.
model.eval()
with torch.no_grad():
step_val = 0
网上学习资料一大堆,但如果学到的知识不成体系,遇到问题时只是浅尝辄止,不再深入研究,那么很难做到真正的技术提升。
一个人可以走的很快,但一群人才能走的更远!不论你是正从事IT行业的老鸟或是对IT行业感兴趣的新人,都欢迎加入我们的的圈子(技术交流、学习资源、职场吐槽、大厂内推、面试辅导),让我们一起学习成长!
== 0:
loss_val = 0.
model.eval()
with torch.no_grad():
step_val = 0
[外链图片转存中…(img-REhTpSYk-1714289423085)]
[外链图片转存中…(img-tsE6fgy3-1714289423085)]
网上学习资料一大堆,但如果学到的知识不成体系,遇到问题时只是浅尝辄止,不再深入研究,那么很难做到真正的技术提升。
一个人可以走的很快,但一群人才能走的更远!不论你是正从事IT行业的老鸟或是对IT行业感兴趣的新人,都欢迎加入我们的的圈子(技术交流、学习资源、职场吐槽、大厂内推、面试辅导),让我们一起学习成长!