[深度学习]胶质瘤病灶分割技术文档(不断更新)
参考:
-
https://blog.csdn.net/weixin_42338058/article/details/84190420(python中用于读取医学图像的常用库)
-
https://imageio.readthedocs.io/en/stable/userapi.html (imageioAPI, 用于nii转化为jpg)
1. 数据预处理
1.1 数据处理
原始数据集为nii格式,常用网络需要jpg格式的输入,故首先需要将数据转化为jpg。python用于处理医疗影像图片的库有两个
-
SimpleITK
-
nibabel
SimpleITK 和 Nibabel 的区别:
-
SimpleITK 加载数据是通道在前,如(18,512,512);
-
Nibabel 是 通道在后,如(512,512,18),其中18是图像通道数,即18张图像,可以把nii看成二维图像,也可以看成三维。
-
nibabel加载出来的图像被旋转了90度,横过来了; SimpleITK加载的图片旋转了180°
-
选用SimpleITK提取图片
1.2 遇到的问题
-
问题1 scipy.misc.imsave()弃用
解决方式,scipy.misc.imsave()在高版本被取代改用imageio.imwrite()
-
问题2转化jpg出现ValueError: Max value == min value, ambiguous given dtype
解决方式,针对此类异常进行修正
if mi == ma: DataSlice = np.ones_like(DataSlice, dtype=np.uint8) * mi
-
问题3 报错 sitk::ERROR: Unable to determine ImageIO reader for “…/…/RAWDATA/NEW_MultiCenter_Lesion/MS\HS\HSMS003\2D-T2FLAIR_labelnii”
解决方式:修改该文件后缀
-
问题4 读取出来的图片的有8种分辨率不一致
resolution (256,204) (256,228) (256,256) (320,270) (320,320) (512,408) (512,512) (640,640) number 20 48 1928 521 269 100 2609 80 解决方式:将8种分辨率置于不同文件夹,在后期决定如何训练。
-
问题5: 读取的label像素值为0-255,需要将有病灶的区域置于1,其它区域置为0.
解决方式:分3种情况保存
if sum(sum(ImgArray)) == 0: #全是背景,直接保存 imageio.imwrite(SavePath, ImgArray) else: if ImgArray.max() == 1: # 如果图片本身已经是正确标注,直接保存 imageio.imwrite(SavePath, ImgArray) else: # label标签为0-255时,设定阈值50 ImgArray[ImgArray[:] < 50] = 0 ImgArray[ImgArray[:] >= 50] = 1 #将label有病灶置为1,没有病灶置为0 imageio.imwrite(SavePath, ImgArray)
1.3 数据预处理模块代码
数据预处理模块由4部分构成
-
数据读取
-
将nii文件转化为jpg格式数据并储存
-
数据命名格式:CQMS005_0000.jpg 最后4位为切片的维度。data和label名称一致,但放在不同的文件夹
# coding=utf-8 import nibabel as nib import SimpleITK as sitk import os import numpy as np import matplotlib.pyplot as plt DataRootPath = r'../../RAWDATA/NEW_MultiCenter_Lesion/MS' SaveRootPath = r'../../RAWDATA/RAWDATA_jpg' # 将MS所有图像转化为jpg def read_img(path): img = sitk.ReadImage(path) data = sitk.GetArrayFromImage(img) return data # 显示一个系列图 def show_img(data): for i in range(data.shape[0]): plt.imshow(data[i, :, :], cmap='gray') print(i) plt.show() def get_shape(DataPath, SavePath, SaveName, FileName): import SimpleITK as sitk img = sitk.ReadImage(DataPath) DataVolumn = sitk.GetArrayFromImage(img) for channel in range(DataVolumn.shape[0]): SaveNameOutput = SaveName + '_%04d.jpg' % channel OutputPath = os.path.join(SavePath, SaveNameOutput) DataSlice = DataVolumn[channel, :, :] ImageShape = (DataSlice.shape[0], DataSlice.shape[1]) with open('%s.txt'%FileName, 'a') as f: f.write(OutputPath + ' ' + str(ImageShape) + '\n') def nii2jpg(DataPath, SavePath, SaveName): import SimpleITK as sitk import imageio import numpy as np img = sitk.ReadImage(DataPath) DataVolumn = sitk.GetArrayFromImage(img) for channel in range(DataVolumn.shape[0]): SaveNameOutput = SaveName + '_%04d.jpg'%channel OutputPath = os.path.join(SavePath, SaveNameOutput) print(OutputPath) DataSlice = DataVolumn[channel,:,:] mi = np.nanmin(DataSlice) ma = np.nanmax(DataSlice) if mi == ma: DataSlice = np.ones_like(DataSlice, dtype=np.uint8) * mi imageio.imsave(OutputPath, DataSlice) if __name__ == '__main__': PersonNames = os.listdir(DataRootPath) # iteration_one for PersonName in PersonNames: # how many sicker if len(PersonName) >= 8: # 有一个乱码长度大于8 continue PersonNamePath = os.path.join(DataRootPath, PersonName) PersonPhotoTimes = os.listdir(PersonNamePath) # iteration_two for PersonPhotoTime in PersonPhotoTimes: if len(PersonPhotoTime) >= 10: # 避免乱码 continue PersonPerTimeImagePath = os.path.join(PersonNamePath, PersonPhotoTime) FileNames = os.listdir(PersonPerTimeImagePath) for FileName in FileNames: if 'nii' not in FileName: # 跳过非数据文件 continue # print(FileName) DataPath = os.path.join(PersonPerTimeImagePath, FileName) SaveRootName = PersonPhotoTime if 'label' not in FileName: SavePath = os.path.join(SaveRootPath, 'data') nii2jpg(DataPath=DataPath, SavePath=SavePath, SaveName=SaveRootName) get_shape(DataPath=DataPath, SavePath=SavePath, SaveName=SaveRootName, FileName='ImageShape_data') else: SavePath = os.path.join(SaveRootPath, 'label') nii2jpg(DataPath=DataPath, SavePath=SavePath, SaveName=SaveRootName) get_shape(DataPath=DataPath, SavePath=SavePath, SaveName=SaveRootName, FileName='ImageShape_label')
-
-
检查异常项(主要检查同一data和label维度是否匹配的情况)
import shutil import os AbnormalDataRootPath = r'../../RAWDATA/AbnormalData' with open('ImageShape_data.txt', 'r') as f1: with open('ImageShape_label.txt', 'r') as f2: i = 0 for DataLine, LabelLine in zip(f1.readlines(), f2.readlines()): i += 1 DataLineSplit = DataLine.split(' ') LabelLineSplit = LabelLine.split(' ') DataLineShape =DataLineSplit[1][:-1] LabelLineShape = LabelLineSplit[1][:-1] print(DataLineShape, LabelLineShape) if DataLineShape != LabelLineShape: shutil.move(DataLineSplit[0], os.path.join(AbnormalDataRootPath, 'data')) shutil.move(LabelLineSplit[0], os.path.join(AbnormalDataRootPath, 'label'))
-
resolution classify 将jpg图片按照不同分辨率置于不同的文件夹
# import pandas as pd import os import shutil def main(): ImageShapeClass = [] with open('ImageShape_data.txt', 'r') as f1: with open('ImageShape_label.txt', 'r') as f2: i = 0 for DataLine, LabelLine in zip(f1.readlines(), f2.readlines()): i += 1 DataLineSplit = DataLine.split(' ') LabelLineSplit = LabelLine.split(' ') DataLineShape = DataLineSplit[1][:-1] LabelLineShape = LabelLineSplit[1][:-1] DataLinePath = os.path.split(DataLineSplit[0])[0] LabelLinePath = os.path.split(LabelLineSplit[0])[0] FileName = os.path.split(DataLineSplit[0])[1] if LabelLineShape != DataLineShape: # if data's shape != label's shape skip continue else: # create file path for different resolution image if LabelLineShape not in ImageShapeClass: ImageShapeClass.append(LabelLineShape) NewPathFile = 'Resolution_' + str(LabelLineShape) DataNewPath = os.path.join(DataLinePath, NewPathFile) LabelNewPath = os.path.join(LabelLinePath, NewPathFile) DataIsExists = os.path.exists(DataNewPath) LabelIsExists = os.path.exists(LabelNewPath) if not DataIsExists: os.mkdir(DataNewPath) if not LabelIsExists: os.mkdir(LabelNewPath) DataMovePath = os.path.join(DataLinePath, 'Resolution_' + str(LabelLineShape)) LabelMovePath = os.path.join(LabelLinePath, 'Resolution_' + str(LabelLineShape)) DataRawPath = DataLineSplit[0] LabelRawPath = LabelLineSplit[0] shutil.move(DataRawPath, DataMovePath) shutil.move(LabelRawPath, LabelMovePath) if __name__ == '__main__': main()
-
将jpg格式的label转化为baseline的png格式,并对label的像素值进行处理,有病灶的区域置为1,其他区域置为0。
import imageio import os ImageRootPath = r'..\..\RAWDATA\RAWDATA_jpg\label\Resolution_(640, 640)' SaveRootPath = r'..\..\RAWDATA\RAWDATA_jpg\label\Resolution_(640, 640)\label_png' def jpg2png(ImagePath, SavePath): ImgArray = imageio.imread(ImagePath) if sum(sum(ImgArray)) == 0: #全是背景,直接保存 imageio.imwrite(SavePath, ImgArray) else: if ImgArray.max() == 1: # 如果图片本身已经是正确标注,直接保存 imageio.imwrite(SavePath, ImgArray) else: # label标签为0-255时,设定阈值50 ImgArray[ImgArray[:] < 50] = 0 #将label有病灶置为1,没有病灶置为0 ImgArray[ImgArray[:] >= 50] = 1 imageio.imwrite(SavePath, ImgArray) if __name__ == '__main__': ImageNames = os.listdir(ImageRootPath) print('{:_^50}'.format('start transform')) for ImageName in ImageNames: if 'jpg' not in ImageName: continue ImagePath = os.path.join(ImageRootPath, ImageName) ImageNameNew = ImageName.split('.')[0] + '.png' # png格式命名 SavePath = os.path.join(SaveRootPath, ImageNameNew) jpg2png(ImagePath=ImagePath, SavePath=SavePath) print('{:_^4}'.format('Successful!'))
2 模型训练
2.1 baseline
采用MIT开源全卷积神经网络(FCN)进行baseline训练。程序已跑通,下周使用公司GPU完整运行。
MIT开源全卷积神经网络(FCN)源码地址:
[外链图片转存失败(img-yprT6Xr3-1563698300993)(C:\Users\eveadam\AppData\Roaming\Typora\typora-user-images\1563697418017.png)]