文章标题

最新推荐文章于 2023-03-12 18:04:40 发布

so__sunshine

最新推荐文章于 2023-03-12 18:04:40 发布

阅读量1k

点赞数 1

分类专栏： kaggle

本文链接：https://blog.csdn.net/so__sunshine/article/details/70767201

版权

kaggle 专栏收录该内容

3 篇文章 0 订阅

订阅专栏

import dicom # for reading dicom files
import os # for doing directory operations 
import pandas as pd # for some simple data analysis (right now, just to load in the labels data and quickly reference it)

# Change this to wherever you are storing your data:
# IF YOU ARE FOLLOWING ON KAGGLE, YOU CAN ONLY PLAY WITH THE SAMPLE DATA, WHICH IS MUCH SMALLER

data_dir = '../input/sample_images/'
patients = os.listdir(data_dir)
labels_df = pd.read_csv('../input/stage1_labels.csv', index_col=0)

labels_df.head()

for patient in patients[:1]:
    label = labels_df.get_value(patient, 'cancer')
    path = data_dir + patient

    # a couple great 1-liners from: https://www.kaggle.com/gzuidhof/data-science-bowl-2017/full-preprocessing-tutorial
    slices = [dicom.read_file(path + '/' + s) for s in os.listdir(path)]

    slices.sort(key = lambda x: int(x.ImagePositionPatient[2]))　　#对一个病人的所有slices进行排序，x指的是一个slice。slice里面有好多属性，有一个是ImagePositionPatient.按照他的这个属性进行对这些slices排序，方便我们组三维rendering。做三维渲染时需要合适的顺序。至于为什么方便？怎么做三维渲染？待解。
    print(len(slices),label)
    print(slices[0])

import cv2
import numpy as np

#这里图片的像素值为512*512，太大，我们将其设置为150*150.这里只是初步演示下怎么设置为150*150
IMG_PX_SIZE=150

for patient in patients[:1]:
    label=labels_df.get_value(patient,'cancer')
    path=data_dir+patient
    slices=[dicom.read_file(path +'/'+s)for s in os.listdir(path)]
    slices.sort(key=lambda x:int(x.ImagePositionPatient[2]))
# 如果需要同时绘制多幅图表，可以给figure()传递一个整数参数指定Figure对象的序号，如果序号所指定的Figure对象已经存在，将不创建新的对象，而只是让它成为当前的Figure对象
    fig=plt.figure(1)
    for num,each_slice in enumerate(slices[:12]):
        y=fig.add_subplot(3,4,num+1)
        # opencv可以resize图像到合适的尺寸
        new_img=cv2.resize(np.array(each_slice.pixel_array),(IMG_PX_SIZE,IMG_PX_SIZE))
        y.imshow(new_img)
    plt.show()

#下一步我们想要做的是depth的操作，不同病人的depth是不一样的，即是len(slices)是不一样的接下来我们要将其深度一致，
#我们可以采用下采样的方法．三维卷积神经网络(大多数算法)对输入数据要求维数是相同的．
import math
def chunks(l, n):
    # Credit: Ned Batchelder
    # Link: http://stackoverflow.com/questions/312443/how-do-you-split-a-list-into-evenly-sized-chunks
    """Yield successive n-sized chunks from l."""
    for i in range(0, len(l), n):
        yield l[i:i + n]

def mean(l):
    return sum(l) / len(l)

IMG_PX_SIZE = 150
HM_SLICES = 20

data_dir = '../input/sample_images/'
patients = os.listdir(data_dir)
labels_df = pd.read_csv('../input/stage1_labels.csv', index_col=0)

for patient in patients[:10]:
    try:
        label = labels_df.get_value(patient, 'cancer')
        path = data_dir + patient
        slices = [dicom.read_file(path + '/' + s) for s in os.listdir(path)]
        slices.sort(key = lambda x: int(x.ImagePositionPatient[2]))
        new_slices = []
        slices = [cv2.resize(np.array(each_slice.pixel_array),(IMG_PX_SIZE,IMG_PX_SIZE)) for each_slice in slices]
        #math.ceil()函数返回数字的上入整数。
        chunk_sizes = math.ceil(len(slices) / HM_SLICES)
        for slice_chunk in chunks(slices, chunk_sizes): #不明白？
            slice_chunk = list(map(mean, zip(*slice_chunk)))
            new_slices.append(slice_chunk)

        print(len(slices), len(new_slices))
    except:
        # some patients don't have labels, so we'll just pass on this for now
        pass