数据预处理prepare_data

MR乳腺图像的预处理:

单序列的数据预处理:

# 作者:  王文江
# 开发日期: 2023/12/12
import random
import os
import h5py
import pydicom
import numpy as np
import glob
from skimage import exposure

label_set = ["预后良好", "预后不良"]

class MedicalImageDeal:
    def __init__(self, img, path, percent=1):
        self.img = img
        self.percent = percent
        self.p = path

    @property
    def valid_img(self):
        cdf = exposure.cumulative_distribution(self.img)
        if len(cdf[0]) == 0:
            watershed = self.img.min()
        else:
            watershed = cdf[1][cdf[0] >= self.percent][0]
        return np.clip(self.img, self.img.min(), watershed)

    @property
    def norm_img(self):
        return ((self.img - self.img.min()) / (self.img.max() - self.img.min())).astype(np.float32)

    @property
    def width_center1(self):
        image = pydicom.dcmread(self.p).pixel_array
        cdf = exposure.cumulative_distribution(image)
        if len(cdf[0]) == 0:
            return np.zeros_like(image)

        cdf_min = np.min(cdf[1])
        a = np.diff(cdf)
        if len(a) == 0:
            return np.zeros_like(image)
        aa = a[0][500 - cdf_min:1000]
        if len(aa) < 3:
            return np.zeros_like(image)

        aa_max = np.max(aa)
        xx = []
        for i in range(aa.shape[0]):
            if aa[i] > aa_max * 0.005:
                xx.append(i + 500)
        if len(xx) < 3:
            return np.zeros_like(image)

        ds = np.clip(image, xx[0], xx[len(xx) - 3])
        max_val = np.max(ds)
        min_val = np.min(ds)
        if max_val == min_val:
            return np.zeros_like(ds)
        ds = (ds - min_val) / (max_val - min_val)
        return ds
def generate_data(root_data, index, save_dir):
    low_energy_fold = os.listdir(os.path.join(root_data, label_set[index], "T1"))
    r_num = 0
    missing_pixel_data_files = []

    for patient_name in low_energy_fold:
        low_energy_list = glob.glob(os.path.join(root_data, label_set[index], "T1", patient_name, "*.dcm"))
        low_energy_list = sorted(low_energy_list)

        for low_energy_file in low_energy_list:
            dicom_data = pydicom.dcmread(low_energy_file)

            # 检查DICOM文件是否包含像素数据
            if hasattr(dicom_data, 'PixelData') and dicom_data.PixelData is not None:
                image = dicom_data.pixel_array
                low_energy_image = MedicalImageDeal(image, low_energy_file).valid_img

                R = random.randint(1, 100)
                if R >= 0 and R <= 70:
                    subdir = "train"
                elif R > 70 and R <= 90:
                    subdir = "valid"
                else:
                    subdir = "test"

                # 创建子目录(如果不存在)
                subdir_path = os.path.join(save_dir, subdir)
                os.makedirs(subdir_path, exist_ok=True)

                file_name = '{}_{}.h5'.format(patient_name, r_num)
                file_path = os.path.join(subdir_path, file_name)
                with h5py.File(file_path, 'w') as f:
                    f.create_dataset('LOW_ENERGY', data=low_energy_image, compression="gzip")
                    f.create_dataset('label', data=index)

                r_num += 1
                print("Processed {} - UID: {} - Label: {}".format(r_num, patient_name, label_set[index]))
            else:
                missing_pixel_data_files.append(low_energy_file)

    if missing_pixel_data_files:
        print("The following DICOM files are missing pixel data and were skipped:")
        for missing_file in missing_pixel_data_files:
            print(missing_file)


root_data = r"H:\王文江DL"
save_dir = r"I:\代码\MR_fussion\MF\h5py"

# 读取dMMR数据
generate_data(root_data, 0, save_dir)
# 读取pMMR数据
generate_data(root_data, 1, save_dir)

多序列融合的数据预处理:

import random
from skimage import transform
import numpy as np
import matplotlib.pyplot as plt
import nibabel as nib
import SimpleITK as sitk
import math
import glob
import os
import h5py
import pydicom
import cv2
from skimage.measure import label
# import gdcm

label_set = ["良性","恶性"]

def label_to_num(label):
    return label_set.index(label)
def num_to_label(n):
    return label_set[n]

import random

class MedicalImageDeal(object):
    def __init__(self, img, path, percent=1):
        self.img = img
        self.percent = percent
        self.p = path

    @property
    def valid_img(self):
        from skimage import exposure
        cdf = exposure.cumulative_distribution(self.img)
        watershed = cdf[1][cdf[0] >= self.percent][0]
        return np.clip(self.img, self.img.min(), watershed)

    @property
    def norm_img(self):
        return ((self.img - self.img.min()) / (self.img.max() - self.img.min())).astype(np.float32)

    @property
    def width_center1(self):
        from skimage import exposure
        image = pydicom.read_file(self.p).pixel_array
        cdf = exposure.cumulative_distribution(image)
        cdf_min = np.min(cdf[1])
        a = np.diff(cdf)
        aa = a[0][500 - cdf_min:1000]
        aa_max = np.max(aa)
        xx = []
        for i in range(aa.shape[0]):
            if aa[i] > aa_max * 0.005:
                xx.append(i + 500)
        ds = np.clip(image, xx[0], xx[len(xx) - 3])
        max = np.max(ds)
        min = np.min(ds)
        ds = (ds-min)/(max-min)
        return ds

def generate_data(root_data,index,save_dir):
    low_energy_fold = os.listdir(os.path.join(root_data,label_set[index],"T1"))
    high_energy_fold = os.listdir(os.path.join(root_data,label_set[index],"T2"))
    enhance_fold = os.listdir(os.path.join(root_data,label_set[index],"enhance"))
    
    patient_names = low_energy_fold
    images = []
    labs = []
    n = 0
    for i in range(len(patient_names)):  # len(patient_names)
        low_energy_list = glob.glob(os.path.join(root_data,label_set[index],"T1",patient_names[i],"*.dcm"))
        high_energy_list = glob.glob(os.path.join(root_data,label_set[index],"T2",patient_names[i],"*.dcm"))
        enhance_list = glob.glob(os.path.join(root_data,label_set[index],"enhance",patient_names[i],"*.dcm"))
        
        low_energy_list = sorted(low_energy_list)
        high_energy_list = sorted(high_energy_list)
        enhance_list = sorted(enhance_list)
        
        for j in range(len(low_energy_list)):
            item = {"low_energy":low_energy_list[j],
                    "high_energy":high_energy_list[j],
                    "enhance":enhance_list[j]}
            images.append(item)
            labs.append(index)
        r_num = 0
    for image_file,lab in zip(images,labs):
        (flepath, flename) = os.path.split(image_file["low_energy"])
        (rename, suffix) = os.path.splitext(flename)
        uid = rename
        low_energy_image = MedicalImageDeal(pydicom.dcmread(image_file["low_energy"]),
                                    image_file["low_energy"]).width_center1
        high_energy_image = MedicalImageDeal(pydicom.dcmread(image_file["high_energy"]),
                                    image_file["high_energy"]).width_center1
        enhance_image = MedicalImageDeal(pydicom.dcmread(image_file["enhance"]),
                                    image_file["enhance"]).width_center1
        R = random.randint(1, 100)
        if R >= 0 and R <= 70:
            f = h5py.File(save_dir+"/train" + '/{}_{}.h5'.format(uid, r_num), 'w')
        elif R > 70 and R <= 90:
            f = h5py.File(save_dir+"/valid" + '/{}_{}.h5'.format(uid, r_num), 'w')
        else:
            f = h5py.File(save_dir+"/test" + '/{}_{}.h5'.format(uid, r_num), 'w')
        f.create_dataset('LOW_ENERGY', data=low_energy_image, compression="gzip")
        f.create_dataset('HIGH_ENERGY', data=high_energy_image, compression="gzip")
        f.create_dataset('ENHANCE', data=enhance_image, compression="gzip")
        f.create_dataset('label', data=lab)
        f.close()
        r_num += 1
        print("process {} uid = {} label={}".format(r_num, uid,lab))

root_data = r"F:\300多的乳腺MR图像"
save_dir = r"F:\乳腺癌数据代码\MF\h5py"

# 读取恶性数据
generate_data(root_data,0,save_dir)
# 读取良性数据
generate_data(root_data,1,save_dir)

以上是处理MR乳腺癌的预处理过程。用于CT的肺并不适用,现在修改了代码,用于处理肺CT肺窗的图像预处理。加入了均值化。去掉了width_center1。

肺CT肺窗图像的预处理

import random
from skimage import transform
import numpy as np
import matplotlib.pyplot as plt
import nibabel as nib
import SimpleITK as sitk
import math
import glob
import os
import h5py
import pydicom
import cv2
from skimage.measure import label

# import gdcm

label_set = ["预后良好", "预后不良"]


def label_to_num(label):
    return label_set.index(label)


def num_to_label(n):
    return label_set[n]


import random


class MedicalImageDeal(object):
    def __init__(self, img, path, percent=1):
        self.img = img
        self.percent = percent
        self.p = path

    @property
    def valid_img(self):
        from skimage import exposure
        cdf = exposure.cumulative_distribution(self.img)
        watershed = cdf[1][cdf[0] >= self.percent][0]
        return np.clip(self.img, self.img.min(), watershed)

    @property
    def norm_img(self):
        return ((self.img - self.img.min()) / (self.img.max() - self.img.min())).astype(np.float32)


    @property
    def Histogram_equalization(self):
        from skimage import exposure
        # Read the image
        img = pydicom.dcmread(self.p).pixel_array
        # Perform histogram equalization
        img_res = exposure.equalize_hist(img)
        return img_res

def generate_data(root_data, index, save_dir):
    low_energy_fold = os.listdir(os.path.join(root_data, label_set[index], "T1"))
    high_energy_fold = os.listdir(os.path.join(root_data, label_set[index], "T2"))
    enhance_fold = os.listdir(os.path.join(root_data, label_set[index], "enhance"))

    patient_names = low_energy_fold
    images = []
    labs = []
    n = 0
    for i in range(len(patient_names)):  # len(patient_names)
        low_energy_list = glob.glob(os.path.join(root_data, label_set[index], "T1", patient_names[i], "*.dcm"))
        high_energy_list = glob.glob(os.path.join(root_data, label_set[index], "T2", patient_names[i], "*.dcm"))
        enhance_list = glob.glob(os.path.join(root_data, label_set[index], "enhance", patient_names[i], "*.dcm"))

        low_energy_list = sorted(low_energy_list)
        high_energy_list = sorted(high_energy_list)
        enhance_list = sorted(enhance_list)

        for j in range(len(low_energy_list)):
            item = {"low_energy": low_energy_list[j],
                    "high_energy": high_energy_list[j],
                    "enhance": enhance_list[j]}
            images.append(item)
            labs.append(index)
        r_num = 0
    for image_file, lab in zip(images, labs):
        (flepath, flename) = os.path.split(image_file["low_energy"])
        (rename, suffix) = os.path.splitext(flename)
        uid = rename
        low_energy_image = MedicalImageDeal(pydicom.dcmread(image_file["low_energy"]),
                                            image_file["low_energy"]).Histogram_equalization
        high_energy_image = MedicalImageDeal(pydicom.dcmread(image_file["high_energy"]),
                                             image_file["high_energy"]).Histogram_equalization
        enhance_image = MedicalImageDeal(pydicom.dcmread(image_file["enhance"]),
                                         image_file["enhance"]).Histogram_equalization
        R = random.randint(1, 100)
        if R >= 0 and R <= 70:
            f = h5py.File(save_dir + "/train" + '/{}_{}.h5'.format(uid, r_num), 'w')
        elif R > 70 and R <= 90:
            f = h5py.File(save_dir + "/valid" + '/{}_{}.h5'.format(uid, r_num), 'w')
        else:
            f = h5py.File(save_dir + "/test" + '/{}_{}.h5'.format(uid, r_num), 'w')
        f.create_dataset('LOW_ENERGY', data=low_energy_image, compression="gzip")
        f.create_dataset('HIGH_ENERGY', data=high_energy_image, compression="gzip")
        f.create_dataset('ENHANCE', data=enhance_image, compression="gzip")
        f.create_dataset('label', data=lab)
        f.close()
        r_num += 1
        print("process {} uid = {} label={}".format(r_num, uid, lab))


root_data = r"F:\王文江DL"
save_dir = r"F:\h5py"

# 读取恶性数据
generate_data(root_data, 0, save_dir)
# 读取良性数据
generate_data(root_data, 1, save_dir)

  • 8
    点赞
  • 10
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值