给大家放几个我常用的代码,包括批量修改文件名、归一化标签、常用医学图像数据预处理、移动文件夹、对齐nii格式数据原点、打印标签类别数、打印图像尺寸、打印图像spacing (nii.gz格式数据)
1、批量修改文件名
import os
def rename_and_move_files(source_folder, destination_folder):
# 确保目标文件夹存在,如果不存在则创建
os.makedirs(destination_folder, exist_ok=True)
# 获取源文件夹中的所有文件
files = os.listdir(source_folder)
# 遍历文件夹中的所有文件并重命名并移动到目标文件夹
for index, file in enumerate(files, start=1):
original_path = os.path.join(source_folder, file)
new_name = f'volume_{index}.nii.gz'
new_path = os.path.join(destination_folder, new_name)
# 重命名并移动文件
os.rename(original_path, new_path)
print(f'Renamed and moved: {original_path} to {new_path}')
# 指定源文件夹和目标文件夹路径
source_folder = 'D:\Datas\BraTS-TCGA-GBM\L_name'
destination_folder = 'D:\Datas\BraTS-TCGA-GBM\L'
# 调用函数重命名并移动文件
rename_and_move_files(source_folder, destination_folder)
2、归一化标签
import os
import SimpleITK as sitk
import numpy as np
def normalize_nii_gz_files(folder_path):
# 获取文件夹中的所有文件
files = [f for f in os.listdir(folder_path) if f.endswith('.nii.gz')]
# 遍历文件夹中的所有 .nii.gz 文件
for file_name in files:
file_path = os.path.join(folder_path, file_name)
# 加载图像
image = sitk.ReadImage(file_path)
# 获取图像数据
image_data = sitk.GetArrayFromImage(image)
# 对图像数据进行归一化
normalized_data = (image_data - np.min(image_data)) / (np.max(image_data) - np.min(image_data))
# 将归一化后的数据重新赋值给图像
normalized_image = sitk.GetImageFromArray(normalized_data)
normalized_image.CopyInformation(image)
# 保存更新后的图像
normalized_file_path = os.path.join(folder_path, f"{file_name}")
sitk.WriteImage(normalized_image, normalized_file_path)
print(f"{file_name}: Normalized and saved as {normalized_file_path}")
# 指定文件夹路径
folder_path = r'D:\qinchendong\XNet-main\dataset\BraTS\muti-model-BraTS_t1_t2\BraTS_preprocess\val\mask'
# 调用函数对 .nii.gz 文件进行归一化
normalize_nii_gz_files(folder_path)
3、常用医学图像数据预处理
import numpy as np
import os
import SimpleITK as sitk
import random
from scipy import ndimage
from os.path import join
import config
class LITS_preprocess:
def __init__(self, raw_dataset_path,fixed_dataset_path, args):
self.raw_root_path = raw_dataset_path
self.fixed_path = fixed_dataset_path
self.classes = args.n_labels # 分割类别数(只分割肝脏为2,或者分割肝脏和肿瘤为3)
self.upper = args.upper
self.lower = args.lower
self.expand_slice = args.expand_slice # 轴向外侧扩张的slice数量
self.size = args.min_slices # 取样的slice数量
self.xy_down_scale = args.xy_down_scale
self.slice_down_scale = args.slice_down_scale
self.valid_rate = args.valid_rate
def fix_data(self):
if not os.path.exists(self.fixed_path): # 创建保存目录
os.makedirs(join(self.fixed_path,'ct'))
os.makedirs(join(self.fixed_path, 'label'))
file_list = os.listdir(join(self.raw_root_path,'ct'))
Numbers = len(file_list)
print('Total numbers of samples is :',Numbers)
for ct_file,i in zip(file_list,range(Numbers)):
print("==== {} | {}/{} ====".format(ct_file, i+1,Numbers))
ct_path = os.path.join(self.raw_root_path, 'ct', ct_file)
seg_path = os.path.join(self.raw_root_path, 'label', ct_file.replace('volume', 'segmentation'))
new_ct, new_seg = self.process(ct_path, seg_path, classes = self.classes)
if new_ct != None and new_seg != None:
sitk.WriteImage(new_ct, os.path.join(self.fixed_path, 'ct', ct_file))
sitk.WriteImage(new_seg, os.path.join(self.fixed_path, 'label', ct_file.replace('volume', 'segmentation')))#.replace('.nii.', '.nii.gz')
def process(self, ct_path, seg_path, classes=None):
ct = sitk.ReadImage(ct_path, sitk.sitkInt16)
ct_array = sitk.GetArrayFromImage(ct)
seg = sitk.ReadImage(seg_path, sitk.sitkInt8)
seg_array = sitk.GetArrayFromImage(seg)
print("Ori shape:",ct_array.shape, seg_array.shape)
if classes==2:
# 将金标准中肝脏和肝肿瘤的标签融合为一个
seg_array[seg_array > 0] = 1
# 将灰度值在阈值之外的截断掉
ct_array[ct_array > self.upper] = self.upper
ct_array[ct_array < self.lower] = self.lower
# 降采样,(对x和y轴进行降采样,slice轴的spacing归一化到slice_down_scale)
ct_array = ndimage.zoom(ct_array, (ct.GetSpacing()[-1] / self.slice_down_scale, self.xy_down_scale, self.xy_down_scale), order=3)
seg_array = ndimage.zoom(seg_array, (ct.GetSpacing()[-1] / self.slice_down_scale, self.xy_down_scale, self.xy_down_scale), order=0)
# 找到肝脏区域开始和结束的slice,并各向外扩张
z = np.any(seg_array, axis=(1, 2))
start_slice, end_slice = np.where(z)[0][[0, -1]]
# 两个方向上各扩张个slice
if start_slice - self.expand_slice < 0:
start_slice = 0
else:
start_slice -= self.expand_slice
if end_slice + self.expand_slice >= seg_array.shape[0]:
end_slice = seg_array.shape[0] - 1
else:
end_slice += self.expand_slice
print("Cut out range:",str(start_slice) + '--' + str(end_slice))
# 如果这时候剩下的slice数量不足size,直接放弃,这样的数据很少
if end_slice - start_slice + 1 < self.size:
#print('Too little slice,give up the sample:', ct_file)
return None,None
# 截取保留区域
# ct_array = ct_array[start_slice:end_slice + 1, 0:50, 0:50]
# seg_array = seg_array[start_slice:end_slice + 1, 0:50, 0:50]
ct_array = ct_array[start_slice:end_slice + 1, :, :]#128:256
seg_array = seg_array[start_slice:end_slice + 1, :, :]
print("Preprocessed shape:",ct_array.shape,seg_array.shape)
# 保存为对应的格式
new_ct = sitk.GetImageFromArray(ct_array)
new_ct.SetDirection(ct.GetDirection())
new_ct.SetOrigin(ct.GetOrigin())
new_ct.SetSpacing((ct.GetSpacing()[0] * int(1 / self.xy_down_scale), ct.GetSpacing()[1] * int(1 / self.xy_down_scale), self.slice_down_scale))
new_seg = sitk.GetImageFromArray(seg_array)
new_seg.SetDirection(ct.GetDirection())
new_seg.SetOrigin(ct.GetOrigin())
new_seg.SetSpacing((ct.GetSpacing()[0] * int(1 / self.xy_down_scale), ct.GetSpacing()[1] * int(1 / self.xy_down_scale), self.slice_down_scale))
return new_ct, new_seg
def write_train_val_name_list(self):
data_name_list = os.listdir(join(self.fixed_path, "ct"))
data_num = len(data_name_list)
print('the fixed dataset total numbers of samples is :', data_num)
random.shuffle(data_name_list)
assert self.valid_rate < 1.0
train_name_list = data_name_list[0:int(data_num*(1-self.valid_rate))]
val_name_list = data_name_list[int(data_num*(1-self.valid_rate)):int(data_num*((1-self.valid_rate) + self.valid_rate))]
self.write_name_list(train_name_list, "train_path_list.txt")
self.write_name_list(val_name_list, "val_path_list.txt")
def write_name_list(self, name_list, file_name):
f = open(join(self.fixed_path, file_name), 'w')
for name in name_list:
ct_path = os.path.join(self.fixed_path, 'ct', name)
seg_path = os.path.join(self.fixed_path, 'label', name.replace('volume', 'segmentation'))
f.write(ct_path + ' ' + seg_path + "\n")
f.close()
if __name__ == '__main__':
raw_dataset_path = r'D:/qinchendong/changhai_hospital'
fixed_dataset_path = r'D:/qinchendong/changhai_hosptial_fixed'
args = config.args
tool = LITS_preprocess(raw_dataset_path,fixed_dataset_path, args)
tool.fix_data() # 对原始图像进行修剪并保存
tool.write_train_val_name_list() # 创建索引txt文件
4、移动文件夹
import os
import shutil
def move_files(source_folder, t1_destination, t2_destination, manual_destination):
# 遍历源文件夹中的所有子文件夹
for foldername in os.listdir(source_folder):
folder_path = os.path.join(source_folder, foldername)
# 检查文件夹是否存在,并且是一个文件夹
if os.path.isdir(folder_path):
# 获取当前文件夹中所有文件
files = os.listdir(folder_path)
# 遍历文件夹中的所有文件
for file in files:
file_path = os.path.join(folder_path, file)
# 根据文件名的结尾移动文件
if file.endswith('t1.nii.gz'):
shutil.move(file_path, os.path.join(t1_destination, file))
elif file.endswith('t2.nii.gz'):
shutil.move(file_path, os.path.join(t2_destination, file))
elif file.endswith('ManuallyCorrected.nii.gz'):
shutil.move(file_path, os.path.join(manual_destination, file))
# 指定源文件夹和目标路径
source_folder = 'D:\Datas\BraTS-TCGA-GBM\Pre-operative_TCGA_GBM_NIfTI_and_Segmentations'
t1_destination = 'D:\Datas\BraTS-TCGA-GBM\H'
t2_destination = 'D:\Datas\BraTS-TCGA-GBM\L'
manual_destination = 'D:\Datas\BraTS-TCGA-GBM\mask'
# 调用函数移动文件
move_files(source_folder, t1_destination, t2_destination, manual_destination)
5、对齐nii格式数据原点
import SimpleITK as sitk
# 读取两个NIfTI格式的图像
image1 = sitk.ReadImage('D:/qinchendong/XNet-main/dataset/pancreas_demo/train_sup_100/L/volume_1.nii.gz')
image2 = sitk.ReadImage('D:/qinchendong/XNet-main/dataset/pancreas_demo/train_sup_100/H//volume_1.nii.gz')
# 获取image2的原点信息
origin2 = image2.GetOrigin()
# 将image1的原点设置为与image2相同
image1.SetOrigin(origin2)
# 保存对齐后的图像
sitk.WriteImage(image1, 'D:/qinchendong/XNet-main/dataset/pancreas_demo/train_sup_100/aligned_image1.nii.gz')
6、打印标签类别数
import os
import SimpleITK as sitk
def print_label_classes(folder_path):
# 获取文件夹中的所有文件
files = [f for f in os.listdir(folder_path) if f.endswith('.nii.gz')]
# 遍历文件夹中的所有 .nii.gz 文件
for file_name in files:
file_path = os.path.join(folder_path, file_name)
# 加载标签图像
label_image = sitk.ReadImage(file_path)
# 获取标签中的唯一值
unique_labels = list(set(sitk.GetArrayViewFromImage(label_image).flatten()))
# 打印标签是几分类的
num_classes = len(unique_labels)
print(f"{file_name}: {num_classes} classes")
# 指定文件夹路径
folder_path = r'D:\qinchendong\XNet-main\dataset\BraTS\muti-model-BraTS_t1_t2\BraTS_preprocess\train_sup_100\mask'
# 调用函数打印 .nii.gz 标签的分类数量
print_label_classes(folder_path)
7、打印图像尺寸
import os
import nibabel as nib
def get_nii_gz_dimensions(folder_path):
# 获取文件夹中的所有文件
files = [f for f in os.listdir(folder_path) if os.path.isfile(os.path.join(folder_path, f))]
# 遍历文件夹中的所有 .nii.gz 文件并输出其三个维度的大小
for file_name in files:
if file_name.endswith('.nii.gz'):
file_path = os.path.join(folder_path, file_name)
image = nib.load(file_path)
size_x, size_y, size_z = image.shape
print(f"{file_name}: Dimensions - {size_x} x {size_y} x {size_z}")
# 指定文件夹路径
folder_path = r'D:\qinchendong\XNet-main\dataset\BraTS\muti-model-BraTS_t1_t2\BraTS_unpreprocess\mask'
# 调用函数获取 .nii.gz 文件的三个维度的大小
get_nii_gz_dimensions(folder_path)
8、打印图像spacing
import os
import SimpleITK as sitk
def print_spacing_of_nii_gz_files(folder_path):
# 获取文件夹中的所有文件
files = [f for f in os.listdir(folder_path) if f.endswith('.nii.gz')]
# 遍历文件夹中的所有 .nii.gz 文件
for file_name in files:
file_path = os.path.join(folder_path, file_name)
# 加载图像
image = sitk.ReadImage(file_path)
# 获取图像间距信息
spacing = image.GetSpacing()
# 打印间距信息
print(f"{file_name}: Spacing - {spacing}")
# 指定文件夹路径
folder_path = 'D:/qinchendong/XNet-main/dataset/BraTS/muti-model-BraTS_t1_t2/BraTS_preprocess/train_sup_100/mask'
# 调用函数打印 .nii.gz 文件的间距信息
print_spacing_of_nii_gz_files(folder_path)