这个脚本的任务是:从mask中提取最大的ROI,然后映射到DCE原图中,获取原图最大ROI的上一层及下一层,共三层。然后去除掉周围的0像素,再利用双线性插值到224*224大小的图像。再映射到T2序列的原图中,得到224*224大小的图像(mask和T2图像的大小不一样,利用最邻近插值将mask调整到T2大小)。还要获取CSV中这个患者的特征。将图像信息及CSV中的特征一块保存到h5文件中。以便在网络中进行融合。
import glob
from random import random
import pandas as pd
from skimage import exposure
import h5py
import SimpleITK as sitk
from PIL import Image
import torch
import torch.nn.functional as F
import matplotlib.pyplot as plt
import os
import numpy as np
import pydicom
import nibabel as nib
import random
"""
这个脚本只能处理良性文件夹或恶性文件夹,需要运行两次,良性一次,恶性一次,另外需要修改label为0或1
可以将良性或恶性文件夹下的DCE和T2的数据,CSV的数据还有label数据保存到h5文件中
image_DCE_path文件夹存储着所有DCE序列原始图像的nii文件
image_T2_path文件夹存储着所有T2序列原始图像的nii文件
label_path文件夹存储着所有掩膜图像的nii文件
save_dir文件夹存储着所有处理后的图像h5文件
csv_file 为pyradiomics特征提取后的csv文件路径,注意要删除里面没意义的列
name_labbel = 0 为良性或恶性的标签
一定要删除csv中字符串部分,不然会报错
"""
# # 设置文件夹路径
image_benign_DCE_path = r'C:\Users\Administrator\Desktop\Breast\benign\DCE'
image_benign_T2_path = r'C:\Users\Administrator\Desktop\Breast\benign\T2'
label_benign_path = r'C:\Users\Administrator\Desktop\Breast\benign_label'
# CSV文件地址
csv_benign_file = r"C:\Users\Administrator\Desktop\Breast\benign.csv"
label0 = 0
image_malignant_DCE_path = r'C:\Users\Administrator\Desktop\Breast\malignant\DCE'
image_malignant_T2_path = r'C:\Users\Administrator\Desktop\Breast\malignant\T2'
label_malignant_path = r'C:\Users\Administrator\Desktop\Breast\malignant_label'
csv_malignant_file = r"C:\Users\Administrator\Desktop\Breast\malignant.csv" # CSV文件地址
label1 = 1
save_dir = r'C:\Users\Administrator\Desktop\Breast\data'
# 定义一个函数来修剪图像,去除空白部分
def trim_image(image):
# 转换为numpy数组
image_array = np.array(image)
# 找到非零像素的边界
non_zero_indices = np.nonzero(image_array)
min_row = np.min(non_zero_indices[0])
max_row = np.max(non_zero_indices[0])
min_col = np.min(non_zero_indices[1])
max_col = np.max(non_zero_indices[1])
min_depth = np.min(non_zero_indices[2])
max_depth = np.max(non_zero_indices[2])
# 裁剪图像
cropped_image_array = image_array[min_row:max_row + 1, min_col:max_col + 1, min_depth:max_depth + 1]
return cropped_image_array
# 数据预处理
def preprocess_data(image_array, window_width=1000, window_center=500):
"""
对图像进行预处理,包括窗宽窗位变换。
Args:
image_array: 原始图像nii文件读取后的数据
window_width:窗宽
window_center: 窗位
Returns:对图像进行了窗宽窗位变换和均质化后的图像
"""
# 提取每张图像的像素值
#img: 需要增强的图片
#window_width:窗宽
#window_center:中心
minWindow = float(window_center)-0.5*float(window_width)
new_img = (image_array -minWindow)/float(window_width)
new_img[new_img<0] = 0
new_img[new_img>1] = 1
img = (new_img*255).astype('uint8')
img_ = []
for i in range(img.shape[0]):
# Perform histogram equalization
img_res = exposure.equalize_hist(img[i])
img_.append(img_res)
return np.array(img_)
def concat_image(label_path,image_path , label = False):
# 读取标签NII文件
image_label = sitk.ReadImage(label_path)
# 读取原始NII文件
image_origin = sitk.ReadImage(image_path)
# 转换为NumPy数组
origin_array = sitk.GetArrayFromImage(image_origin)
label_array = sitk.GetArrayFromImage(image_label)
# 提取像素值
origin_array = np.array([origin_array[i] for i in range(origin_array.shape[0])])
label_array = np.array([label_array[i] for i in range(label_array.shape[0])])
origin_size = origin_array[0, :, :].shape
if label == True:
label_array = F.interpolate(torch.tensor(label_array, dtype=torch.float32).unsqueeze(0), size=origin_size, mode='nearest').squeeze().numpy().astype(np.uint8)
#对数据进行均质化和窗宽窗位的调整
#origin_array = preprocess_data(origin_array)
# 遍历每张图片
max_nonzero_pixels = 0
max_nonzero_index = None
for i in range(label_array.shape[0]):
# 计算当前图片中非零像素的数量
nonzero_pixels = np.count_nonzero(label_array[i])
# 如果当前图片的非零像素数量比之前的最大值大,则更新最大值和对应的索引
if nonzero_pixels > max_nonzero_pixels:
max_nonzero_pixels = nonzero_pixels
max_nonzero_index = i
roi_array = np.array([label_array[max_nonzero_index] * origin_array[max_nonzero_index - 1],
label_array[max_nonzero_index] * origin_array[max_nonzero_index],
label_array[max_nonzero_index] * origin_array[max_nonzero_index + 1]])
finish_array = trim_image(roi_array).astype(np.float64)
image_tensor = torch.tensor(finish_array, dtype=torch.float32).unsqueeze(0)
# 目标图像大小
target_height, target_width = 224, 224
# 使用双线性插值角对齐对图像进行缩放
output_bilinear_corners_True = F.interpolate(image_tensor, size=(target_height, target_width), mode='bilinear',
align_corners=True)
# 将张量转换回 numpy 数组
output_bilinear_corners_True_array = output_bilinear_corners_True.squeeze().numpy().astype(np.uint8)
return output_bilinear_corners_True_array
def get_features_by_name(name, csv_file):
"""
根据姓名获取到csv文件中的同名那一行数据,保存这行第二列之后的数据
Args:
name: 为出入的姓名
csv_file: 为pyradiomics特征提取后的csv文件路径,注意要删除里面没意义的列
Returns:
"""
# 读取CSV文件
df = pd.read_csv(csv_file)
# 查找姓名是否在第一列中
if name in df['Name'].values:
# 获取姓名所在行的索引
index = df.index[df['Name'] == name].tolist()[0]
# 获取特征(姓名后的列)
features = df.iloc[index, 1:].tolist()
return pd.DataFrame(features)
else:
print(f"Name '{name}' not found in CSV file.")
return None
def save_data(image_DCE_path,image_T2_path,label_path,label,csv_file):
r_num = 0
image_DCE_files = glob.glob(f'{image_DCE_path}/*.nii')
# image_T2_files = glob.glob(f'{image_T2_path}/*.nii')
# label_files = glob.glob(f'{label_path}/*.nii')
image_DCE_files = [x.split('.')[0] for x in os.listdir(image_DCE_path)]
label_files = os.listdir(label_path)
for i ,name_label in enumerate(label_files):
name = name_label.split('-')[0]
if name in image_DCE_files:
label_path1 = os.path.join(label_path, name_label)
DCE_path = os.path.join(image_DCE_path, name)
T2_path = os.path.join(image_T2_path, name)
# 读取原始图像
image_DCE = concat_image(label_path1, DCE_path)
image_T2 = concat_image(label_path1, T2_path , label = True)
name_labbel = name.split('.')[0]
csv_data = get_features_by_name(name_labbel, csv_file)
R = random.randint(1, 100)
if R >= 0 and R <= 70:
os.makedirs(save_dir+"/train", exist_ok=True)
f = h5py.File(save_dir+"/train" + '/{}_{}.h5'.format(name_labbel, r_num), 'w')
elif R > 70 and R <= 90:
os.makedirs(save_dir + "/valid", exist_ok=True)
f = h5py.File(save_dir+"/valid" + '/{}_{}.h5'.format(name_labbel, r_num), 'w')
else:
os.makedirs(save_dir + "/test", exist_ok=True)
f = h5py.File(save_dir+"/test" + '/{}_{}.h5'.format(name_labbel, r_num), 'w')
f.create_dataset('data_DCE', data=image_DCE, compression="gzip")
f.create_dataset('data_T2', data=image_T2, compression="gzip")
f.create_dataset('csv', data=csv_data)
f.create_dataset('label', data=label)
f.close()
r_num += 1
print("process {} uid = {} label={}".format(r_num, name_labbel,label))
# plt.imshow(image_DCE[2], cmap='gray')
#
# plt.show() # 显示图像
save_data(image_benign_DCE_path,image_benign_T2_path,label_benign_path,label0,csv_benign_file)
save_data(image_malignant_DCE_path,image_malignant_T2_path,label_malignant_path,label1,csv_malignant_file)