luna16肺部CT数据集预处理（根据标注信息生成掩膜）

颀羽

已于 2022-03-25 20:16:02 修改

阅读量3.2k

点赞数 3

分类专栏： CV 文章标签： python

于 2022-03-25 20:03:41 首次发布

本文链接：https://blog.csdn.net/m0_57527312/article/details/123744157

版权

CV 专栏收录该内容

3 篇文章 0 订阅

订阅专栏

in:（luna16）

*.mhd

*.raw

标注文件：annotations.csv

out:

大小为512*512，只包含一张scan的图片和掩膜

from __future__ import print_function, division
import SimpleITK as sitk
import numpy as np
import csv
from glob import glob
import os
import pandas as pd
try:
    from tqdm import tqdm # long waits are not fun
except:
    print('TQDM does make much nicer wait bars...')
    tqdm = lambda x: x

#Some helper functions

def make_mask(center,diam,z,width,height,spacing,origin):
    '''
Center : centers of circles px -- list of coordinates x,y,z
diam : diameters of circles px -- diameter
widthXheight : pixel dim of image
spacing = mm/px conversion rate np array x,y,z
origin = x,y,z mm np.array
z = z position of slice in world coordinates mm
    '''
    mask = np.zeros([height,width]) # 0's everywhere except nodule swapping x,y to match img
    #convert to nodule space from world coordinates

    # Defining the voxel range in which the nodule falls
    v_center = (center-origin)/spacing
    v_diam = int(diam/spacing[0]+5)
    v_xmin = np.max([0,int(v_center[0]-v_diam)-5])
    v_xmax = np.min([width-1,int(v_center[0]+v_diam)+5])
    v_ymin = np.max([0,int(v_center[1]-v_diam)-5])
    v_ymax = np.min([height-1,int(v_center[1]+v_diam)+5])

    v_xrange = range(v_xmin,v_xmax+1)
    v_yrange = range(v_ymin,v_ymax+1)

    # Convert back to world coordinates for distance calculation
    x_data = [x*spacing[0]+origin[0] for x in range(width)]
    y_data = [x*spacing[1]+origin[1] for x in range(height)]

    # Fill in 1 within sphere around nodule
    for v_x in v_xrange:
        for v_y in v_yrange:
            p_x = spacing[0]*v_x + origin[0]
            p_y = spacing[1]*v_y + origin[1]
            if np.linalg.norm(center-np.array([p_x,p_y,z]))<=diam:
                mask[int((p_y-origin[1])/spacing[1]),int((p_x-origin[0])/spacing[0])] = 1.0
    return(mask)

def matrix2int16(matrix):
    '''
matrix must be a numpy array NXN
Returns uint16 version
    '''
    m_min= np.min(matrix)
    m_max= np.max(matrix)
    matrix = matrix-m_min
    return(np.array(np.rint( (matrix-m_min)/float(m_max-m_min) * 65535.0),dtype=np.uint16))

############
#
# Getting list of image files
luna_path = "C:/Users/jiaop/Desktop/luna16_dataset/"
luna_subset_path = luna_path+"set1/"
output_path = luna_path+"output_set1/"
file_list=glob(luna_subset_path+"*.mhd")


#####################
#
# Helper function to get rows in data frame associated
# with each file
def get_filename(file_list, case):
    for f in file_list:
        if case in f:
            return(f)
#
# The locations of the nodes
df_node = pd.read_csv(luna_path+"annotations.csv")
df_node["file"] = df_node["seriesuid"].map(lambda file_name: get_filename(file_list, file_name))
df_node = df_node.dropna()

#####
#
# Looping over the image files
#
for fcount, img_file in enumerate(tqdm(file_list)):
    mini_df = df_node[df_node["file"]==img_file] #get all nodules associate with file
    if mini_df.shape[0]>0: # some files may not have a nodule--skipping those
        # load the data once
        itk_img = sitk.ReadImage(img_file)
        img_array = sitk.GetArrayFromImage(itk_img) # indexes are z,y,x (notice the ordering)
        num_z, height, width = img_array.shape        #heightXwidth constitute the transverse plane
        origin = np.array(itk_img.GetOrigin())      # x,y,z  Origin in world coordinates (mm)
        spacing = np.array(itk_img.GetSpacing())    # spacing of voxels in world coor. (mm)
        # go through all nodes (why just the biggest?)
        for node_idx, cur_row in mini_df.iterrows():
            node_x = cur_row["coordX"]
            node_y = cur_row["coordY"]
            node_z = cur_row["coordZ"]
            diam = cur_row["diameter_mm"]
            # just keep 3 slices
            # imgs = np.ndarray([3,height,width],dtype=np.float32)
            # masks = np.ndarray([3,height,width],dtype=np.uint8)
            # # try 1 slice
            # imgs = np.ndarray([1, height, width], dtype=np.float32)
            # masks = np.ndarray([1,height,width],dtype=np.uint8)#原版

            imgs = np.ndarray([height, width], dtype=np.float32)
            masks = np.ndarray([height, width], dtype=np.uint8)  # 原版
            # masks = np.ndarray([1, height, width], dtype=np.float32)#修改
            center = np.array([node_x, node_y, node_z])   # nodule center
            v_center = np.rint((center-origin)/spacing)  # nodule center in voxel space (still x,y,z ordering)
            # print(np.arange(int(v_center[2])-1,
            #                  int(v_center[2])+2).clip(0, num_z-1))
            # for i, i_z in enumerate(np.arange(int(v_center[2])-1,
            #                  int(v_center[2])+2).clip(0, num_z-1)): # clip prevents going out of bounds in Z
            #     print('i=')
            #     print(i)
            #     print('i_z=')
            #     print(i_z)
            #     mask = make_mask(center, diam, i_z*spacing[2]+origin[2],
            #                      width, height, spacing, origin)
            #     masks[i] = mask
            #     imgs[i] = img_array[i_z]
            for i, i_z in enumerate(np.arange(int(v_center[2])-1,
                             int(v_center[2])+2).clip(0, num_z-1)): # clip prevents going out of bounds in Z
                print('i=')
                print(i)
                print('i_z=')
                i_z=i_z+1
                print(i_z)

                mask = make_mask(center, diam, i_z*spacing[2]+origin[2],
                                 width, height, spacing, origin)
                # masks[i] = mask
                # imgs[i] = img_array[i_z]
                masks= mask
                imgs = img_array[i_z]

                break
            np.save(os.path.join(output_path,"images_%04d_%04d.npy" % (fcount, node_idx)),imgs)
            np.save(os.path.join(output_path,"masks_%04d_%04d.npy" % (fcount, node_idx)),masks)
            print(imgs.shape)

查看生成的npy文件：

import numpy as np

from matplotlib import pyplot as plt

working_path = "C:/Users/jiaop/Desktop/luna16_dataset/output_set1/"

imgs = np.load(working_path + 'masks_0008_0105.npy')

print(imgs.shape)

plt.imshow(imgs, cmap='gray')#灰度图展示

plt.show()