1. 文件格式
先看看文件夹下的数据文件, .mhd 和 .raw 是成对出现的.
import os
root = os.path.join(os.getcwd(),'data\\chestCT_round1_train_part1\\train_part1')
paths = os.listdir(root)
print(paths)
[‘396905.mhd’, ‘396905.raw’, ‘496305.mhd’, ‘496305.raw’, ‘522159.mhd’, ‘522159.raw’, …]
2. simpleITK 包读取 mhd 文件
import SimpleITK as sitk
import matplotlib.pyplot as plt
%matplotlib inline
for path in paths:
if path.find('mhd')>=0:
data =sitk.ReadImage(os.path.join(root,path))
print(data)
sitk.ReadImage(mhd_path) 读取到的数据
Image (000002497CF17750)
RTTI typeinfo: class itk::Image<short,3>
Reference Count: 1
Modified Time: 10251
Debug: Off
Object Name:
Observers:
none
Source: (none)
Source output name: (none)
Release Data: Off
Data Released: False
Global Release Data: Off
PipelineMTime: 10224
UpdateMTime: 10250
RealTimeStamp: 0 seconds
LargestPossibleRegion:
Dimension: 3
Index: [0, 0, 0]
Size: [512, 512, 57]
BufferedRegion:
Dimension: 3
Index: [0, 0, 0]
Size: [512, 512, 57]
RequestedRegion:
Dimension: 3
Index: [0, 0, 0]
Size: [512, 512, 57]
Spacing: [0.59375, 0.59375, 5]
Origin: [-172, -122.6, -265.75]
Direction:
1 0 0
0 1 0
0 0 1
IndexToPointMatrix:
0.59375 0 0
0 0.59375 0
0 0 5
PointToIndexMatrix:
1.68421 0 0
0 1.68421 0
0 0 0.2
Inverse Direction:
1 0 0
0 1 0
0 0 1
PixelContainer:
ImportImageContainer (000002497CEFDE20)
RTTI typeinfo: class itk::ImportImageContainer<unsigned __int64,short>
Reference Count: 1
Modified Time: 10247
Debug: Off
Object Name:
Observers:
none
Pointer: 000002490BAC3040
Container manages memory: true
Size: 14942208
Capacity: 14942208
其中比较有用的信息是 spacing, 代表相邻体素的距离(mm),可以用它来估算连通域的体积。
3. 显示每个切片
spacing = data.GetSpacing()
scan = sitk.GetArrayFromImage(data)
print('spacing: ', spacing)
print('# slice: ', len(scan))
plot_ct_scan(scan)
spacing: (0.59375, 0.59375, 5.0)
#slice: 57
关键代码
def plot_ct_scan(scan, num_column=4, jump=1):
num_slices = len(scan)
num_row = (num_slices//jump + num_column - 1) // num_column
f, plots = plt.subplots(num_row, num_column, figsize=(num_column*5, num_row*5))
for i in range(0, num_row*num_column):
plot = plots[i % num_column] if num_row == 1 else plots[i // num_column, i % num_column]
plot.axis('off')
if i < num_slices//jump:
plot.imshow(scan[i*jump], cmap=plt.cm.bone)
4. 肺部分割
首先自然想到 阈值分割,看看 HU 值分布,最小值是边角的黑色区域,
而且肺部和它周边区域可以用阈值 -400 分开。
from skimage.segmentation import clear_border
from skimage.measure import label,regionprops, perimeter
from skimage.morphology import ball, disk, dilation, binary_erosion, remove_small_objects, erosion, closing, reconstruction, binary_closing
from skimage.filters import roberts, sobel
from scipy import ndimage as ndi
import scipy.ndimage
def get_segmented_lungs(im, spacing, threshold=-400):
'''
This funtion segments the lungs from the given 2D slice.
'''
'''
Step 1: Convert into a binary image.
'''
binary = im < threshold
'''
Step 2: Remove the blobs connected to the border of the image.
'''
cleared = clear_border(binary)
'''
Step 3: Label the image.
'''
label_image = label(cleared)
'''
Step 4: Keep the labels with 2 largest areas.
'''
areas = [r.area for r in regionprops(label_image)]
areas.sort()
if len(areas) > 2:
for region in regionprops(label_image):
if region.area < areas[-2]:
for coordinates in region.coords:
label_image[coordinates[0], coordinates[1]] = 0
binary = label_image > 0
'''
Step 5: Erosion operation with a disk of radius 2. This operation is
seperate the lung nodules attached to the blood vessels.
'''
selem = disk(2)
binary = binary_erosion(binary, selem)
'''
Step 6: Closure operation with a disk of radius 10. This operation is
to keep nodules attached to the lung wall.
'''
selem = disk(10)
binary = binary_closing(binary, selem)
edges = roberts(binary)
binary = ndi.binary_fill_holes(edges)
return binary
path = paths[0]
data = sitk.ReadImage(os.path.join(root,path))
spacing = data.GetSpacing()
scan = sitk.GetArrayFromImage(data)
print(scan.shape[0])
mask = np.array([get_segmented_lungs(slice.copy(), spacing) for slice in scan])
scan[~mask] = 0
plot_ct_scan(scan, jump=1)
分割结果
优化:
from skimage import measure
def extract_main(mask, spacing, vol_limit=[0.68, 8.2]):
voxel_vol = spacing[0]*spacing[1]*spacing[2]
label = measure.label(mask, connectivity=1)
properties = measure.regionprops(label)
for prop in properties:
if prop.area * voxel_vol < vol_limit[0] * 1e6 or prop.area * voxel_vol > vol_limit[1] * 1e6:
mask[label == prop.label] = 0
return mask
mask = extract_main(mask, spacing)
scan[~mask] = 0
plot_ct_scan(scan, jump=1)
结果
5. 3D 可视化
import numpy as np
from skimage import measure, feature
from mpl_toolkits.mplot3d import Axes3D
from mpl_toolkits.mplot3d.art3d import Poly3DCollection
def plot_3d(image, threshold=-400):
# Position the scan upright,
# so the head of the patient would be at the top facing the camera
p = image.transpose(2,1,0)
# p = p[:,:,::-1]
verts,faces = measure.marching_cubes_classic(p, threshold)
fig = plt.figure(figsize=(10, 10))
ax = fig.add_subplot(111, projection='3d')
# Fancy indexing: `verts[faces]` to generate a collection of triangles
mesh = Poly3DCollection(verts[faces], alpha=0.1)
face_color = [0.5, 0.5, 1]
mesh.set_facecolor(face_color)
ax.add_collection3d(mesh)
ax.set_xlim(0, p.shape[0])
ax.set_ylim(0, p.shape[1])
ax.set_zlim(0, p.shape[2])
plt.show()
plot_3d(scan)
结果