在处理LIDC_IPDI时,处理到后期发现一个问题,在每一个病例中,切片的命名的编号,跟dicom中给出的命名的编号不一致,这在处理数据时,无形的增加则一些难度,尤其是在运行别人的代码时,这个问题尤为突出,经过不断的搜索,最终在官网的一个页面上发现了这个问题的一种解决方式,先付上官网的连接:
https://wiki.cancerimagingarchive.net/pages/viewpage.action?pageId=24283641
如何下载LIDR_IRDI数据就不多说了,很多博客中都讲的很细致,下面具体的描述一下我遇到的问题。
从NBIA上下载下来数据后,是这样的
打开第一个病例,如图所示:
其中问题出在,此时的第一个文件对应的切片的标号为:80
但是其实我想要的效果是,这样的,标号为000.dcm的文件里存放的时切片号为1的切片:
二、解决
# noinspection PyPep8Naming
import SimpleITK as sitk
import glob as gb
import os
import shutil
def safe_sitk_read(img_list, *args, **kwargs):
"""
Since the default function just looks at images 0 and 1 to determine slice thickness
and the images are often not correctly alphabetically sorted, much slower
:param img_list:
:return:
"""
pimg_list = []
reader = sitk.ImageSeriesReader()
seriesIDs = reader.GetGDCMSeriesIDs(x_path)
dicom_names = reader.GetGDCMSeriesFileNames(x_path, seriesIDs[0])
reader.SetFileNames(dicom_names)
image_list = reader.GetFileNames()
pimg_list = [(sitk.ReadImage(x).GetOrigin(), x) for x in image_list]
s_img_list = [path for _, path in sorted(pimg_list, key = lambda x: x[0][2])]
print('s_img_list',s_img_list)
return s_img_list
x_path= 'E:/qiepianmingming/LIDC-IDRI-0002/01-01-2000-98329/3000522-04919/' #从NBIA上下载的一个病例
jieguo_path = 'E:/qiepianmingming/LIDC-IDRI-0002/1/0' #随便设的一个路径
z_sort = safe_sitk_read(x_path)
i = len(z_sort)+1
for img in z_sort:
i -=1
if i !=0:
if i <=9:
dst = os.path.join(jieguo_path+str(0)+str(0)+str(i)+'.dcm')
elif i>=10 and i <= 99:
dst = os.path.join(jieguo_path+str(0)+ str(i) + '.dcm')
else:
dst = os.path.join(jieguo_path+ str(i) + '.dcm')
shutil.copy(img, dst)
有待解决的问题是,将所有的数据都进行转化
补充
通过一个程序实现所有的文件内的dcm图片按编号排列
# noinspection PyPep8Naming
import SimpleITK as sitk
import glob as gb
import os
import shutil
#这个函数是为了提取每个bcm图片中的InstanceNumber,由与图片的编号用过z的大小提现,我们用z轴进行排序,找到编号
def safe_sitk_read(img_list, *args, **kwargs):
"""
Since the default function just looks at images 0 and 1 to determine slice thickness
and the images are often not correctly alphabetically sorted, much slower
:param img_list:
:return:
"""
pimg_list = []
reader = sitk.ImageSeriesReader()
seriesIDs = reader.GetGDCMSeriesIDs(img_list)
dicom_names = reader.GetGDCMSeriesFileNames(img_list, seriesIDs[0])
reader.SetFileNames(dicom_names)
image_list = reader.GetFileNames()
pimg_list = [(sitk.ReadImage(x).GetOrigin(), x) for x in image_list]
s_img_list = [path for _, path in sorted(pimg_list, key = lambda x: x[0][2])]
return s_img_list
#x_path是导入路径,完整的LIDC-IDRI-0应该如LIDC-IDRI-0001...LIDC-IDRI-0133,每个LIDC-IDRI-0文件夹下是study编号的文件夹,study 下是series文件夹,该文件夹下即为每个病例的所有.dcm文件
x_path = r'E:\zonghe_feijiejie\LIDC-IDRI\LIDC-IDRI-0'
#存储路径,为了与原先的路径匹配起来,路径从E:\zonghe_feijiejie\shunxu_LIDC-IDRI之后都是生成的
jieguo_path = r'E:\zonghe_feijiejie\shunxu_LIDC-IDRI\LIDC-IDRI-0'
for i in range(1,4):
print('i',i)
series1 = ''
series2 = ''
study1 = ''
study2 = ''
dst = ''
jie = ''
#给LIDC-IDRI-0补充
if i <= 9:
dst = os.path.join(x_path + str(0) + str(0) + str(i))
jie = os.path.join(jieguo_path + str(0) + str(0) + str(i))
elif i >= 10 and i <= 99:
dst = os.path.join(x_path + str(0) + str(i))
jie = os.path.join(jieguo_path + str(0) + str(i))
else:
dst = os.path.join(x_path + str(i))
jie = os.path.join(jieguo_path + str(i))
'''
#path为完整的路径,例,E:\zonghe_feijiejie\LIDC-IDRI\LIDC-IDRI- 0001\1.3.6.1.4.1.14519.5.2.1.6279.6001.298806137288633453246975630178\1.3.6.1.4.1.14519.5.2.1.6279.6001.179049373636438705059720603192
'''
study1 = os.path.join(str(dst), str(os.listdir(dst)[0]))
series1 = os.path.join(str(study1), str(os.listdir(study1)[0]))
path = str(series1)
print("path",path)
z_sort = safe_sitk_read(path)
study2 = os.path.join(str(jie), str(os.listdir(dst)[0]))
series2 = study2 + '\\' + str(os.listdir(study1)[0])
os.makedirs(series2)
z_sort = safe_sitk_read(path)
#存储新的数据
#我这里z_sort是按照倒序排序的,即最后一张编号的.dcm图片排在第一位,因此输出反向
j = len(z_sort)
for img in z_sort:
if j <=9:
cunchu = os.path.join(series2+'\\'+str(0)+str(0)+str(0)+str(j)+'.dcm')
elif j>=10 and j<= 99:
cunchu = os.path.join(series2+'\\'+str(0)+str(0)+ str(j) + '.dcm')
else:
cunchu = os.path.join(series2+'\\'+ str(0)+str(j) + '.dcm')
shutil.copy(img, cunchu)
j -=1