批量修改文件名

最新推荐文章于 2025-03-10 11:06:45 发布

Quintin Dong

最新推荐文章于 2025-03-10 11:06:45 发布

阅读量950

点赞数 3

文章标签： python

本文链接：https://blog.csdn.net/weixin_50641682/article/details/136987213

版权

给大家放几个我常用的代码，包括批量修改文件名、归一化标签、常用医学图像数据预处理、移动文件夹、对齐nii格式数据原点、打印标签类别数、打印图像尺寸、打印图像spacing (nii.gz格式数据）

1、批量修改文件名

import os

def rename_and_move_files(source_folder, destination_folder):
    # 确保目标文件夹存在，如果不存在则创建
    os.makedirs(destination_folder, exist_ok=True)

    # 获取源文件夹中的所有文件
    files = os.listdir(source_folder)

    # 遍历文件夹中的所有文件并重命名并移动到目标文件夹
    for index, file in enumerate(files, start=1):
        original_path = os.path.join(source_folder, file)
        new_name = f'volume_{index}.nii.gz'
        new_path = os.path.join(destination_folder, new_name)

        # 重命名并移动文件
        os.rename(original_path, new_path)
        print(f'Renamed and moved: {original_path} to {new_path}')

# 指定源文件夹和目标文件夹路径
source_folder = 'D:\Datas\BraTS-TCGA-GBM\L_name'
destination_folder = 'D:\Datas\BraTS-TCGA-GBM\L'

# 调用函数重命名并移动文件
rename_and_move_files(source_folder, destination_folder)

2、归一化标签

import os
import SimpleITK as sitk
import numpy as np

def normalize_nii_gz_files(folder_path):
    # 获取文件夹中的所有文件
    files = [f for f in os.listdir(folder_path) if f.endswith('.nii.gz')]

    # 遍历文件夹中的所有 .nii.gz 文件
    for file_name in files:
        file_path = os.path.join(folder_path, file_name)

        # 加载图像
        image = sitk.ReadImage(file_path)
        
        # 获取图像数据
        image_data = sitk.GetArrayFromImage(image)

        # 对图像数据进行归一化
        normalized_data = (image_data - np.min(image_data)) / (np.max(image_data) - np.min(image_data))

        # 将归一化后的数据重新赋值给图像
        normalized_image = sitk.GetImageFromArray(normalized_data)
        normalized_image.CopyInformation(image)

        # 保存更新后的图像
        normalized_file_path = os.path.join(folder_path, f"{file_name}")
        sitk.WriteImage(normalized_image, normalized_file_path)
        print(f"{file_name}: Normalized and saved as {normalized_file_path}")

# 指定文件夹路径
folder_path = r'D:\qinchendong\XNet-main\dataset\BraTS\muti-model-BraTS_t1_t2\BraTS_preprocess\val\mask'

# 调用函数对 .nii.gz 文件进行归一化
normalize_nii_gz_files(folder_path)

3、常用医学图像数据预处理

import numpy as np
import os
import SimpleITK as sitk
import random
from scipy import ndimage
from os.path import join
import config

class LITS_preprocess:
    def __init__(self, raw_dataset_path,fixed_dataset_path, args):
        self.raw_root_path = raw_dataset_path
        self.fixed_path = fixed_dataset_path
        self.classes = args.n_labels # 分割类别数（只分割肝脏为2，或者分割肝脏和肿瘤为3）
        self.upper = args.upper
        self.lower = args.lower
        self.expand_slice = args.expand_slice  # 轴向外侧扩张的slice数量
        self.size = args.min_slices  # 取样的slice数量
        self.xy_down_scale = args.xy_down_scale
        self.slice_down_scale = args.slice_down_scale

        self.valid_rate = args.valid_rate

    def fix_data(self):
        if not os.path.exists(self.fixed_path):    # 创建保存目录
            os.makedirs(join(self.fixed_path,'ct'))
            os.makedirs(join(self.fixed_path, 'label'))
        file_list = os.listdir(join(self.raw_root_path,'ct'))
        Numbers = len(file_list)
        print('Total numbers of samples is :',Numbers)
        for ct_file,i in zip(file_list,range(Numbers)):
            print("==== {} | {}/{} ====".format(ct_file, i+1,Numbers))
            ct_path = os.path.join(self.raw_root_path, 'ct', ct_file)
            seg_path = os.path.join(self.raw_root_path, 'label', ct_file.replace('volume', 'segmentation'))
            new_ct, new_seg = self.process(ct_path, seg_path, classes = self.classes)
            if new_ct != None and new_seg != None:
                sitk.WriteImage(new_ct, os.path.join(self.fixed_path, 'ct', ct_file))  
                sitk.WriteImage(new_seg, os.path.join(self.fixed_path, 'label', ct_file.replace('volume', 'segmentation')))#.replace('.nii.', '.nii.gz')

    def process(self, ct_path, seg_path, classes=None):
        ct = sitk.ReadImage(ct_path, sitk.sitkInt16)
        ct_array = sitk.GetArrayFromImage(ct)
        seg = sitk.ReadImage(seg_path, sitk.sitkInt8)
        seg_array = sitk.GetArrayFromImage(seg)

        print("Ori shape:",ct_array.shape, seg_array.shape)
        if classes==2:
            # 将金标准中肝脏和肝肿瘤的标签融合为一个
            seg_array[seg_array > 0] = 1
        # 将灰度值在阈值之外的截断掉
        ct_array[ct_array > self.upper] = self.upper
        ct_array[ct_array < self.lower] = self.lower

        # 降采样，（对x和y轴进行降采样，slice轴的spacing归一化到slice_down_scale）
        ct_array = ndimage.zoom(ct_array, (ct.GetSpacing()[-1] / self.slice_down_scale, self.xy_down_scale, self.xy_down_scale), order=3)
        seg_array = ndimage.zoom(seg_array, (ct.GetSpacing()[-1] / self.slice_down_scale, self.xy_down_scale, self.xy_down_scale), order=0)
        
        # 找到肝脏区域开始和结束的slice，并各向外扩张
        z = np.any(seg_array, axis=(1, 2))
        start_slice, end_slice = np.where(z)[0][[0, -1]]

        # 两个方向上各扩张个slice
        if start_slice - self.expand_slice < 0:
            start_slice = 0
        else:
            start_slice -= self.expand_slice

        if end_slice + self.expand_slice >= seg_array.shape[0]:
            end_slice = seg_array.shape[0] - 1
        else:
            end_slice += self.expand_slice

        print("Cut out range:",str(start_slice) + '--' + str(end_slice))
        # 如果这时候剩下的slice数量不足size，直接放弃，这样的数据很少
        if end_slice - start_slice + 1 < self.size:
            #print('Too little slice，give up the sample:', ct_file)
            return None,None
        # 截取保留区域
        # ct_array = ct_array[start_slice:end_slice + 1, 0:50, 0:50]
        # seg_array = seg_array[start_slice:end_slice + 1, 0:50, 0:50]
        ct_array = ct_array[start_slice:end_slice + 1, :, :]#128:256
        seg_array = seg_array[start_slice:end_slice + 1, :, :]
        print("Preprocessed shape:",ct_array.shape,seg_array.shape)
        # 保存为对应的格式
        new_ct = sitk.GetImageFromArray(ct_array)
        new_ct.SetDirection(ct.GetDirection())
        new_ct.SetOrigin(ct.GetOrigin())
        new_ct.SetSpacing((ct.GetSpacing()[0] * int(1 / self.xy_down_scale), ct.GetSpacing()[1] * int(1 / self.xy_down_scale), self.slice_down_scale))
        
        new_seg = sitk.GetImageFromArray(seg_array)
        new_seg.SetDirection(ct.GetDirection())
        new_seg.SetOrigin(ct.GetOrigin())
        new_seg.SetSpacing((ct.GetSpacing()[0] * int(1 / self.xy_down_scale), ct.GetSpacing()[1] * int(1 / self.xy_down_scale), self.slice_down_scale))
        return new_ct, new_seg

    def write_train_val_name_list(self):
        data_name_list = os.listdir(join(self.fixed_path, "ct"))
        data_num = len(data_name_list)
        print('the fixed dataset total numbers of samples is :', data_num)
        random.shuffle(data_name_list)

        assert self.valid_rate < 1.0
        train_name_list = data_name_list[0:int(data_num*(1-self.valid_rate))]
        val_name_list = data_name_list[int(data_num*(1-self.valid_rate)):int(data_num*((1-self.valid_rate) + self.valid_rate))]

        self.write_name_list(train_name_list, "train_path_list.txt")
        self.write_name_list(val_name_list, "val_path_list.txt")

    def write_name_list(self, name_list, file_name):
        f = open(join(self.fixed_path, file_name), 'w')
        for name in name_list:
            ct_path = os.path.join(self.fixed_path, 'ct', name)
            seg_path = os.path.join(self.fixed_path, 'label', name.replace('volume', 'segmentation'))
            f.write(ct_path + ' ' + seg_path + "\n")
        f.close()

if __name__ == '__main__':
    raw_dataset_path = r'D:/qinchendong/changhai_hospital'
    fixed_dataset_path = r'D:/qinchendong/changhai_hosptial_fixed'

    args = config.args 
    tool = LITS_preprocess(raw_dataset_path,fixed_dataset_path, args)
    tool.fix_data()                            # 对原始图像进行修剪并保存
    tool.write_train_val_name_list()      # 创建索引txt文件

4、移动文件夹

import os
import shutil

def move_files(source_folder, t1_destination, t2_destination, manual_destination):
    # 遍历源文件夹中的所有子文件夹
    for foldername in os.listdir(source_folder):
        folder_path = os.path.join(source_folder, foldername)

        # 检查文件夹是否存在，并且是一个文件夹
        if os.path.isdir(folder_path):
            # 获取当前文件夹中所有文件
            files = os.listdir(folder_path)

            # 遍历文件夹中的所有文件
            for file in files:
                file_path = os.path.join(folder_path, file)

                # 根据文件名的结尾移动文件
                if file.endswith('t1.nii.gz'):
                    shutil.move(file_path, os.path.join(t1_destination, file))
                elif file.endswith('t2.nii.gz'):
                    shutil.move(file_path, os.path.join(t2_destination, file))
                elif file.endswith('ManuallyCorrected.nii.gz'):
                    shutil.move(file_path, os.path.join(manual_destination, file))

# 指定源文件夹和目标路径
source_folder = 'D:\Datas\BraTS-TCGA-GBM\Pre-operative_TCGA_GBM_NIfTI_and_Segmentations'
t1_destination = 'D:\Datas\BraTS-TCGA-GBM\H'
t2_destination = 'D:\Datas\BraTS-TCGA-GBM\L'
manual_destination = 'D:\Datas\BraTS-TCGA-GBM\mask'

# 调用函数移动文件
move_files(source_folder, t1_destination, t2_destination, manual_destination)

5、对齐nii格式数据原点

import SimpleITK as sitk

# 读取两个NIfTI格式的图像
image1 = sitk.ReadImage('D:/qinchendong/XNet-main/dataset/pancreas_demo/train_sup_100/L/volume_1.nii.gz')
image2 = sitk.ReadImage('D:/qinchendong/XNet-main/dataset/pancreas_demo/train_sup_100/H//volume_1.nii.gz')

# 获取image2的原点信息
origin2 = image2.GetOrigin()

# 将image1的原点设置为与image2相同
image1.SetOrigin(origin2)

# 保存对齐后的图像
sitk.WriteImage(image1, 'D:/qinchendong/XNet-main/dataset/pancreas_demo/train_sup_100/aligned_image1.nii.gz')

6、打印标签类别数

import os
import SimpleITK as sitk

def print_label_classes(folder_path):
    # 获取文件夹中的所有文件
    files = [f for f in os.listdir(folder_path) if f.endswith('.nii.gz')]

    # 遍历文件夹中的所有 .nii.gz 文件
    for file_name in files:
        file_path = os.path.join(folder_path, file_name)

        # 加载标签图像
        label_image = sitk.ReadImage(file_path)

        # 获取标签中的唯一值
        unique_labels = list(set(sitk.GetArrayViewFromImage(label_image).flatten()))

        # 打印标签是几分类的
        num_classes = len(unique_labels)
        print(f"{file_name}: {num_classes} classes")

# 指定文件夹路径
folder_path = r'D:\qinchendong\XNet-main\dataset\BraTS\muti-model-BraTS_t1_t2\BraTS_preprocess\train_sup_100\mask'

# 调用函数打印 .nii.gz 标签的分类数量
print_label_classes(folder_path)

7、打印图像尺寸

import os
import nibabel as nib

def get_nii_gz_dimensions(folder_path):
    # 获取文件夹中的所有文件
    files = [f for f in os.listdir(folder_path) if os.path.isfile(os.path.join(folder_path, f))]

    # 遍历文件夹中的所有 .nii.gz 文件并输出其三个维度的大小
    for file_name in files:
        if file_name.endswith('.nii.gz'):
            file_path = os.path.join(folder_path, file_name)
            image = nib.load(file_path)
            size_x, size_y, size_z = image.shape
            print(f"{file_name}: Dimensions - {size_x} x {size_y} x {size_z}")

# 指定文件夹路径
folder_path = r'D:\qinchendong\XNet-main\dataset\BraTS\muti-model-BraTS_t1_t2\BraTS_unpreprocess\mask'

# 调用函数获取 .nii.gz 文件的三个维度的大小
get_nii_gz_dimensions(folder_path)

8、打印图像spacing

import os
import SimpleITK as sitk

def print_spacing_of_nii_gz_files(folder_path):
    # 获取文件夹中的所有文件
    files = [f for f in os.listdir(folder_path) if f.endswith('.nii.gz')]

    # 遍历文件夹中的所有 .nii.gz 文件
    for file_name in files:
        file_path = os.path.join(folder_path, file_name)

        # 加载图像
        image = sitk.ReadImage(file_path)

        # 获取图像间距信息
        spacing = image.GetSpacing()

        # 打印间距信息
        print(f"{file_name}: Spacing - {spacing}")

# 指定文件夹路径
folder_path = 'D:/qinchendong/XNet-main/dataset/BraTS/muti-model-BraTS_t1_t2/BraTS_preprocess/train_sup_100/mask'

# 调用函数打印 .nii.gz 文件的间距信息
print_spacing_of_nii_gz_files(folder_path)