第一步 数据集介绍 准备工作
文件夹6GCCS 原始dicom 数据
每一个代表一个患者
这里代表六个切面
上面为dicom数据,下面是处理过程中需要舍弃的
用来当做三个切面的存储主目录
患者的划分:patienti_split.ipynb
患者名单 GCCS名单.xlsx 如下图所示
以ID后两位 作为患者的唯一标识,钙化指数和冠脉狭窄程度是需要预测的标签,狭窄程度每十分一个程度(经过尝试,无法直接预测出),目前针对钙化指数进行预测
import os
import SimpleITK as sitk
import cv2
from tqdm import tqdm
import pandas as pd
pandas 处理表格数据
# read_excel 文件 注意engine='openpyxl',xlsx文件需要这个参数
patient_data=pd.read_excel('/home/tlz/GCCS/6GCCS/GCCS 名单.xlsx',engine='openpyxl')
# .iloc[:,2]裁剪出第二列 下面有iloc 和 loc的区别
PATIENT_ID=patient_data.iloc[:,2]
PATIENT_GCCS=patient_data.loc[:,'钙化指数']
# 进行了一个拼接操作,
PATIENT_INFORMATION=pd.concat([PATIENT_ID,PATIENT_GCCS],axis=1)
# 按钙化指数做了个排序,因为我们按总钙化指数进行患者划分
PATIENT_INFORMATION=PATIENT_INFORMATION.sort_values('钙化指数')
# 按钙化分数分开
PATIEN_1SCORE = PATIENT_INFORMATION[PATIENT_INFORMATION['钙化指数']==1]
PATIEN_2SCORE = PATIENT_INFORMATION[PATIENT_INFORMATION['钙化指数']==2]
PATIEN_3SCORE = PATIENT_INFORMATION[PATIENT_INFORMATION['钙化指数']==3]
PATIEN_4SCORE = PATIENT_INFORMATION[PATIENT_INFORMATION['钙化指数']==4]
iloc函数用于按位置进行索引
loc 可以用列名进行裁剪
划分成 四个dataframe之后,按照比例划分训练集测试集
注意 为了避免图片的高度相似性,必须从源头上 按患者进行划分
PATIENT_VALIDATION = pd.concat([PATIEN_1SCORE.iloc[0:9,:],PATIEN_2SCORE.iloc[0:6,:],PATIEN_3SCORE.iloc[0:3,:],PATIEN_4SCORE.iloc[0:2,:]])
PATIENT_VALIDATION
55名患者
验证集:19 1 分-9、2分-6 3分-3 4分-1
训练集:36 1 分-17、2分-12 3分-6 4分-2
后期出现新的Excel文件 只有静态的检测分(之前的总评分为静态加固态,由于动态不能稳定,目前只做静态钙化检测)
与前文处理方式相似
import os
from shutil import copy, rmtree
import random
import pandas as pd
patient_data=pd.read_excel('/home/tlz/GCCS/JMY-GCCS-LABEL.xlsx',engine='openpyxl')
PATIENT_ID=patient_data.iloc[:,2]
PATIENT_GCCS=patient_data.loc[:,'Total']
PATIENT_INFORMATION=pd.concat([PATIENT_ID,PATIENT_GCCS],axis=1)
PATIENT_INFORMATION=PATIENT_INFORMATION.sort_values('Total')
PATIEN_0SCORE = PATIENT_INFORMATION[PATIENT_INFORMATION['Total']==0]
PATIEN_1SCORE = PATIENT_INFORMATION[PATIENT_INFORMATION['Total']==1]
PATIEN_2SCORE = PATIENT_INFORMATION[PATIENT_INFORMATION['Total']==2]
resize_dicom2jpg.py 解析dicom帧
import os
import SimpleITK as sitk
import cv2
from tqdm import tqdm
import pandas as pd
def avi2jpg(avi_dir, save_dir):
video = cv2.VideoCapture(avi_dir)
i=0
while(True):
ret, frame = video.read()
if ret:
img_name = '{:0>3d}.jpg'.format(i)
cv2.imwrite(os.path.join(save_dir, img_name), frame)
i = i + 1
else:
break
video.release()
cv2.destroyAllWindows()
def dicom2array(dir):
reader=sitk.ImageFileReader()
reader.SetFileName(dir)
image=reader.Execute()
image_array=sitk.GetArrayFromImage(image)
return image_array
def dicom2jpg(file_dir, save_dir,id,score):
img_array = dicom2array(file_dir)
for i in range(img_array.shape[0]):
img = img_array[i,:,:,:]
# img_name = str(score) +'_'+str(id)+'_'+'{:0>4d}.jpg'.format(i)
img_name = str(id) +'_'+'{:0>4d}.jpg'.format(i)
cv2.imwrite(os.path.join(save_dir, img_name),img)
src_dir = '/home/tlz/GCCS/6GCCS'
# 保存路径 进行修改
save_dir ='/home/tlz/GCCS/Data_SAX-BAS'
# save_dir = '/home/tlz/GCCS/Data/test_patient'
# save_dir = '/home/tlz/GCCS/Data/train_patient'
# save_dir = '/home/tlz/GCCS/Data/validation_patient'
PATIENT_LIST =PATIENT_INFORMATION
# PATIENT_LIST = PATIENT_TEST
# PATIENT_LIST = PATIENT_TRAIN
# PATIENT_LIST = PATIENT_VALIDATION
for root, dirs, files in os.walk(src_dir, topdown=False):
root_LIST=root.split('/')
### 修改 换切面
if root_LIST[-1]=="SAX-BAS":
for file in tqdm(files):
if file!='DICOMDIR' and file[-1]!='g':
for i in range(len(PATIENT_LIST)):
if PATIENT_LIST.iloc[i,0][-2:] == root_LIST[-2][3:5]:
id = root_LIST[-2][3:5]
# print(id)
score = PATIENT_LIST.iloc[i,1]
# print(score)
break
else:
continue
# id = root_LIST[-2][3:5]
# print(id)
# score = 0
file_dir=os.path.join(root,file)
# print(score)
# print(file_dir)
## 命名规则可以在 dicom2jpg里面进行修改 具体需要什么的命名规则可以重新定义
dicom2jpg(file_dir,save_dir,id,score)
数据集划分
每一个文件夹 里包含该切面的一系列文件
Data_A3C为解析出dicom 的原始帧
Data_resize 为对解析后的帧缩放到统一大小
Resize 图像函数到一个大小(包括之前所做的尝试)
mouse_event.py 鼠标点击 确定坐标 已经 rgb 的值
from PIL import Image
import matplotlib.pyplot as plt
import cv2
import numpy as np
def click_event(event,x,y,flags,param):
if event ==cv2.EVENT_LBUTTONDOWN:
print(x,',',y)
font = cv2.FONT_HERSHEY_SIMPLEX
strXY = str(x) +', ' + str(y)
# cv2.putText(img, strXY,(x,y),font,1,(255,255,0),2)
# cv2.imshow('image',img)
if event == cv2.EVENT_RBUTTONDOWN:
blue =img[y,x,0]
green =img[y,x,1]
red =img[y,x,2]
print(blue,',',green,',',red)
font = cv2.FONT_HERSHEY_SIMPLEX
strBGR = str(blue) +', ' + str(green)+','+str(red)
# cv2.putText(img, strBGR,(x,y),font,1,(255,255,0),2)
# cv2.imshow('image',img)
# 708*1016
img_path = "F:\\tlz\gccs_data\Data_LVLAX\Data\\01_0000.jpg"
# 434*636
# img_path ="F:\\tlz\\gccs_data\\Data_LVLAX\\Data\\54_0001.jpg"
img =cv2.imread(img_path)
imgrgb_array=np.zeros((img.shape[0],img.shape[1]))
for i in range(img.shape[0]):
for j in range(img.shape[1]):
blue =img[i,j,0]
green =img[i,j,1]
red =img[i,j,2]
sumbgr = blue +green +red
imgrgb_array[i,j]=sumbgr
cv2.imshow('img',img)
cv2.imshow('imgrgb',imgrgb_array)
cv2.setMouseCallback('img',click_event)
cv2.waitKey(0)
cv2.destroyAllWindows()
rescaleimg.py 对图像进行resize 因为原始图像具有两个分辨率,最后采取直接进行缩放到小的那个尺寸
from operator import le
import numpy as np
import cv2, os, json, shutil
from copy import deepcopy
from os import path as osp
from PIL import Image
from numpy.lib.arraypad import pad
def CropBackground(src,target):
return src[target[0]: target[1], target[2] : target[3],:]
#############################################################################################################
if __name__ == '__main__':
files_imgsize = {}
img_name = []
## 图像目录
img_path = "F:\\tlz\gccs_data\Data_LVLAX\Data"
## resize之后的目录
resize_path="F:\\tlz\gccs_data\\Data_LVLAX_resize"
for file in os.listdir(img_path):
if file[-1] == 'g':
path =os.path.join(img_path,file)
img = Image.open(path)
if img.size in files_imgsize:
if file not in files_imgsize[img.size]:
files_imgsize[img.size].append(file)
if not files_imgsize.get(img.size):
img_name = []
files_imgsize.update({img.size:img_name})
# print(files_imgsize[(636,434)])
for file in files_imgsize[(636,434)]:
path = os.path.join(img_path,file)
# print(path)
label_img = cv2.imread(path)
# print(label_img.shape)
## 裁剪背景434*636 手动标定
# crophw =[1,428,40,592]
# label_img = CropBackground(label_img,target=crophw)
# print(label_img.shape)#427*552*3
resizeimg_path = os.path.join(resize_path,file)
print(resizeimg_path)
cv2.imwrite(resizeimg_path,label_img)
# # break
for file in files_imgsize[(1016,708)]:
path = os.path.join(img_path,file)
# print(path)
label_img = cv2.imread(path)
# print(label_img.shape)
## 裁剪背景708*1016 手动标定
# crophw =[43,193,68,938]
# label_img = CropBackground(label_img,target=crophw)
# print(label_img.shape)#708*116*3
# cv2.imwrite("F:\\tlz\gccs_data\\708.jpg",label_img)
label_img = cv2.resize(label_img,(636,434))
resizeimg_path = os.path.join(resize_path,file)
cv2.imwrite(resizeimg_path,label_img)
# break
# files_path = '/home/tlz/GCCS/Data_A3C_ALL/Data_A3C_resize'
# files_path = '/home/tlz/GCCS/Data_AOSAX_ALL/Data_AOSAX_resize'
files_path = '/home/tlz/GCCS/Data_LVLAX_ALL/Data_LVLAX_resize'
assert os.path.exists(files_path), "path: '{}' does not exist.".format(files_path)
files_name = sorted([file.split(".")[0] for file in os.listdir(files_path)])
# store_path = '/home/tlz/GCCS/Data_A3C_ALL'
# store_path = '/home/tlz/GCCS/Data_AOSAX_ALL'
store_path = '/home/tlz/GCCS/Data_LVLAX_ALL'
train_validation_files=[]
# validation_files=[]
test_files =[]
for file in files_name:
for i in range(len(PATIENT_TEST)):
if PATIENT_TEST.iloc[i,0][-2:] == file[:2]:
test_files.append(file)
for i in range(len(PATIENT_TRAIN)):
if PATIENT_TRAIN.iloc[i,0][-2:] == file[:2]:
train_validation_files.append(file)
for i in range(len(PATIENT_VALIDATION)):
if PATIENT_VALIDATION.iloc[i,0][-2:] == file[:2]:
train_validation_files.append(file)
try:
train_f = open(os.path.join(store_path,"train.txt"),"x")
eval_f =open(os.path.join(store_path,"test.txt"),"x")
train_f.write("\n".join(train_validation_files))
eval_f.write("\n".join(test_files))
except FileExistsError as e:
print(e)
exit(1)
额外补充一下opencv 一些算子的尝试
import cv2 as cv
import numpy as np
from matplotlib import pyplot as plt
img_path = 'F:\\tlz\gccs_data\Data_LVLAX\Data\\06_0000.jpg'
img = cv.imread(img_path)
# canny 效果不好 存在边界丢失
edges =cv.Canny(img,0,280)
# kernel = np.ones((5,5),np.float32)/25
# dst =cv.filter2D(img,-1,kernel)
# cv.imwrite('2d_convolution.jpg',dst)
# blur = cv.blur(img,(5,5))
# blur = cv.GaussianBlur(img,(5,5),0)
# cv.imwrite('gauss_blurring.jpg',blur)
# blur = cv.bilateralFilter(img,9,75,75)
# median = cv.medianBlur(img,5)
#sabel 效果还行
# laplacian = cv.Laplacian(img,cv.CV_64F)
# sobelx = cv.Sobel(img,cv.CV_64F,1,0,ksize=5)
# sobely = cv.Sobel(img,cv.CV_64F,0,1,ksize=5)
# cv.imshow("sobelx",sobelx)
# cv.imshow("sobely",sobely)
# cv.imshow('sobelx+sobely',sobelx+sobely)
# edges =cv.Canny(sobely.astype(np.uint8),0,1)
cv.imshow("edges",edges)
cv.imwrite('F:\\tlz\gccs_data\data_argumention\\edges1.jpg',edges)
# 哈里斯边角检测
# gray = cv.cvtColor(img,cv.COLOR_BGR2GRAY)
# gray = np.float32(gray)
# dst = cv.cornerHarris(gray,10,3,0.04)
# dst = cv.dilate(dst,None)
# # Threshold for an optimal value, it may vary depending on the image.
# img[dst>0.01*dst.max()]=[0,0,255]
# cv.imshow('dst',img)
# gray = cv.cvtColor(img,cv.COLOR_BGR2GRAY)
# corners = cv.goodFeaturesToTrack(gray,25,0.01,10)
# corners = np.int0(corners)
# for i in corners:
# x,y = i.ravel()
# cv.circle(img,(x,y),3,255,-1)
# plt.imshow(img),plt.show()
cv.waitKey(0)
cv.destroyAllWindows()