钙化项目整理流程 1-dicom 解析帧和数据集划分

第一步 数据集介绍 准备工作

文件夹6GCCS 原始dicom 数据
在这里插入图片描述
每一个代表一个患者
在这里插入图片描述
这里代表六个切面

在这里插入图片描述
上面为dicom数据,下面是处理过程中需要舍弃的
在这里插入图片描述
用来当做三个切面的存储主目录

患者的划分:patienti_split.ipynb

患者名单 GCCS名单.xlsx 如下图所示
在这里插入图片描述

在这里插入图片描述
以ID后两位 作为患者的唯一标识,钙化指数和冠脉狭窄程度是需要预测的标签,狭窄程度每十分一个程度(经过尝试,无法直接预测出),目前针对钙化指数进行预测

import os
import SimpleITK as sitk
import cv2
from tqdm import  tqdm
import pandas as pd

pandas 处理表格数据

# read_excel 文件 注意engine='openpyxl',xlsx文件需要这个参数
patient_data=pd.read_excel('/home/tlz/GCCS/6GCCS/GCCS 名单.xlsx',engine='openpyxl')
# .iloc[:,2]裁剪出第二列 下面有iloc 和 loc的区别
PATIENT_ID=patient_data.iloc[:,2]
PATIENT_GCCS=patient_data.loc[:,'钙化指数']
# 进行了一个拼接操作,
PATIENT_INFORMATION=pd.concat([PATIENT_ID,PATIENT_GCCS],axis=1)
# 按钙化指数做了个排序,因为我们按总钙化指数进行患者划分
PATIENT_INFORMATION=PATIENT_INFORMATION.sort_values('钙化指数')
# 按钙化分数分开
PATIEN_1SCORE = PATIENT_INFORMATION[PATIENT_INFORMATION['钙化指数']==1]
PATIEN_2SCORE =  PATIENT_INFORMATION[PATIENT_INFORMATION['钙化指数']==2]
PATIEN_3SCORE =  PATIENT_INFORMATION[PATIENT_INFORMATION['钙化指数']==3]
PATIEN_4SCORE =  PATIENT_INFORMATION[PATIENT_INFORMATION['钙化指数']==4]

iloc函数用于按位置进行索引
在这里插入图片描述
loc 可以用列名进行裁剪
在这里插入图片描述
划分成 四个dataframe之后,按照比例划分训练集测试集
注意 为了避免图片的高度相似性,必须从源头上 按患者进行划分

PATIENT_VALIDATION = pd.concat([PATIEN_1SCORE.iloc[0:9,:],PATIEN_2SCORE.iloc[0:6,:],PATIEN_3SCORE.iloc[0:3,:],PATIEN_4SCORE.iloc[0:2,:]])
PATIENT_VALIDATION

55名患者
验证集:19 1 分-9、2分-6 3分-3 4分-1
训练集:36 1 分-17、2分-12 3分-6 4分-2

后期出现新的Excel文件 只有静态的检测分(之前的总评分为静态加固态,由于动态不能稳定,目前只做静态钙化检测)
与前文处理方式相似

import os
from shutil import copy, rmtree
import random
import pandas  as pd


patient_data=pd.read_excel('/home/tlz/GCCS/JMY-GCCS-LABEL.xlsx',engine='openpyxl')
PATIENT_ID=patient_data.iloc[:,2]
PATIENT_GCCS=patient_data.loc[:,'Total']
PATIENT_INFORMATION=pd.concat([PATIENT_ID,PATIENT_GCCS],axis=1)
PATIENT_INFORMATION=PATIENT_INFORMATION.sort_values('Total')
PATIEN_0SCORE = PATIENT_INFORMATION[PATIENT_INFORMATION['Total']==0]
PATIEN_1SCORE =  PATIENT_INFORMATION[PATIENT_INFORMATION['Total']==1]
PATIEN_2SCORE =  PATIENT_INFORMATION[PATIENT_INFORMATION['Total']==2]

resize_dicom2jpg.py 解析dicom帧

import os
import SimpleITK as sitk
import cv2
from tqdm import  tqdm
import pandas as pd
def avi2jpg(avi_dir, save_dir):
    video = cv2.VideoCapture(avi_dir)
    i=0
    while(True):
        ret, frame = video.read()
        if ret:
            img_name = '{:0>3d}.jpg'.format(i)
            cv2.imwrite(os.path.join(save_dir, img_name), frame)
            i = i + 1
        else:
            break
    video.release()
    cv2.destroyAllWindows()

def dicom2array(dir):
    reader=sitk.ImageFileReader()
    reader.SetFileName(dir)
    image=reader.Execute()
    image_array=sitk.GetArrayFromImage(image)
    return image_array

def dicom2jpg(file_dir, save_dir,id,score):
    img_array = dicom2array(file_dir)
    for i in range(img_array.shape[0]):
        img = img_array[i,:,:,:]
        # img_name = str(score) +'_'+str(id)+'_'+'{:0>4d}.jpg'.format(i)
        img_name = str(id) +'_'+'{:0>4d}.jpg'.format(i)
        cv2.imwrite(os.path.join(save_dir, img_name),img)
src_dir = '/home/tlz/GCCS/6GCCS'
# 保存路径 进行修改
save_dir ='/home/tlz/GCCS/Data_SAX-BAS'
# save_dir = '/home/tlz/GCCS/Data/test_patient'
# save_dir = '/home/tlz/GCCS/Data/train_patient'
# save_dir = '/home/tlz/GCCS/Data/validation_patient'

PATIENT_LIST =PATIENT_INFORMATION
# PATIENT_LIST = PATIENT_TEST
# PATIENT_LIST = PATIENT_TRAIN
# PATIENT_LIST = PATIENT_VALIDATION

for root, dirs, files in os.walk(src_dir, topdown=False):
    root_LIST=root.split('/')
    ###  修改 换切面
    if root_LIST[-1]=="SAX-BAS":
        for file in tqdm(files):
            if file!='DICOMDIR' and file[-1]!='g':
               
                for i in range(len(PATIENT_LIST)):           
                    if  PATIENT_LIST.iloc[i,0][-2:] == root_LIST[-2][3:5]:
                        id = root_LIST[-2][3:5]
                        # print(id)
                        score = PATIENT_LIST.iloc[i,1]
                        # print(score)
                        break
                    else:
                        continue
                        # id = root_LIST[-2][3:5]
                        # print(id)
                        # score = 0
                file_dir=os.path.join(root,file)
              
                # print(score) 
                # print(file_dir)
                ##  命名规则可以在 dicom2jpg里面进行修改  具体需要什么的命名规则可以重新定义
                dicom2jpg(file_dir,save_dir,id,score)

数据集划分

在这里插入图片描述
每一个文件夹 里包含该切面的一系列文件

在这里插入图片描述
Data_A3C为解析出dicom 的原始帧

Data_resize 为对解析后的帧缩放到统一大小

Resize 图像函数到一个大小(包括之前所做的尝试)

mouse_event.py 鼠标点击 确定坐标 已经 rgb 的值

from PIL import Image
import matplotlib.pyplot as plt 
import cv2
import numpy as np
def  click_event(event,x,y,flags,param):
    if event ==cv2.EVENT_LBUTTONDOWN:
        print(x,',',y)
        font  = cv2.FONT_HERSHEY_SIMPLEX
        strXY = str(x) +', ' + str(y)
        # cv2.putText(img, strXY,(x,y),font,1,(255,255,0),2)
        # cv2.imshow('image',img)
    if event == cv2.EVENT_RBUTTONDOWN:
        blue =img[y,x,0]
        green =img[y,x,1]
        red =img[y,x,2]
        print(blue,',',green,',',red)
        font  = cv2.FONT_HERSHEY_SIMPLEX
        strBGR = str(blue) +', ' + str(green)+','+str(red)
        # cv2.putText(img, strBGR,(x,y),font,1,(255,255,0),2)
        # cv2.imshow('image',img)   
        
# 708*1016
img_path = "F:\\tlz\gccs_data\Data_LVLAX\Data\\01_0000.jpg"
# 434*636
# img_path ="F:\\tlz\\gccs_data\\Data_LVLAX\\Data\\54_0001.jpg"

img =cv2.imread(img_path)
imgrgb_array=np.zeros((img.shape[0],img.shape[1]))
for i in range(img.shape[0]):    
    for j in range(img.shape[1]):
        blue =img[i,j,0]
        green =img[i,j,1]
        red =img[i,j,2]
        sumbgr = blue +green +red
        imgrgb_array[i,j]=sumbgr


cv2.imshow('img',img)
cv2.imshow('imgrgb',imgrgb_array)
cv2.setMouseCallback('img',click_event)
cv2.waitKey(0)
cv2.destroyAllWindows()

rescaleimg.py 对图像进行resize 因为原始图像具有两个分辨率,最后采取直接进行缩放到小的那个尺寸

from operator import le
import numpy as np
import cv2, os, json, shutil
from copy import deepcopy
from os import path as osp
from PIL import Image
from numpy.lib.arraypad import pad



def CropBackground(src,target):
    return src[target[0]: target[1], target[2] : target[3],:]




#############################################################################################################


if __name__ == '__main__':
    files_imgsize = {}
    img_name = []
    ## 图像目录
    img_path = "F:\\tlz\gccs_data\Data_LVLAX\Data"
    ## resize之后的目录
    resize_path="F:\\tlz\gccs_data\\Data_LVLAX_resize"
    for file in os.listdir(img_path):
        if file[-1] == 'g':    
            path =os.path.join(img_path,file)
            img = Image.open(path)
            if img.size in files_imgsize: 
                if file not in files_imgsize[img.size]:     
                    files_imgsize[img.size].append(file)
            if  not files_imgsize.get(img.size):
                img_name = []
                files_imgsize.update({img.size:img_name})
      
    # print(files_imgsize[(636,434)])
    
    for file in files_imgsize[(636,434)]:
        path = os.path.join(img_path,file)
        # print(path)
        
        label_img = cv2.imread(path)
        # print(label_img.shape)
        ## 裁剪背景434*636 手动标定
        # crophw =[1,428,40,592]
       
        # label_img = CropBackground(label_img,target=crophw)
        # print(label_img.shape)#427*552*3
        resizeimg_path = os.path.join(resize_path,file)
        print(resizeimg_path)
       
        cv2.imwrite(resizeimg_path,label_img)
        
    #     # break 
  

    for file in files_imgsize[(1016,708)]:
        path = os.path.join(img_path,file)
        # print(path)
        label_img = cv2.imread(path)
        # print(label_img.shape)
        ## 裁剪背景708*1016 手动标定
        # crophw =[43,193,68,938]
       
        # label_img = CropBackground(label_img,target=crophw)
        # print(label_img.shape)#708*116*3
        # cv2.imwrite("F:\\tlz\gccs_data\\708.jpg",label_img)
        label_img = cv2.resize(label_img,(636,434))
        resizeimg_path = os.path.join(resize_path,file)
        cv2.imwrite(resizeimg_path,label_img)
        # break 
       

# files_path = '/home/tlz/GCCS/Data_A3C_ALL/Data_A3C_resize'
# files_path = '/home/tlz/GCCS/Data_AOSAX_ALL/Data_AOSAX_resize'

files_path = '/home/tlz/GCCS/Data_LVLAX_ALL/Data_LVLAX_resize'
assert os.path.exists(files_path), "path: '{}' does not exist.".format(files_path)
files_name = sorted([file.split(".")[0] for file in os.listdir(files_path)])
# store_path = '/home/tlz/GCCS/Data_A3C_ALL'
# store_path = '/home/tlz/GCCS/Data_AOSAX_ALL'
store_path = '/home/tlz/GCCS/Data_LVLAX_ALL'

train_validation_files=[]
# validation_files=[]
test_files =[]

for file in files_name:
    for i in range(len(PATIENT_TEST)):
        if PATIENT_TEST.iloc[i,0][-2:] == file[:2]:
            
            test_files.append(file)
    for i in range(len(PATIENT_TRAIN)):
        if  PATIENT_TRAIN.iloc[i,0][-2:] == file[:2]:
            train_validation_files.append(file)    
    for i in range(len(PATIENT_VALIDATION)):
        if  PATIENT_VALIDATION.iloc[i,0][-2:] == file[:2]:
            train_validation_files.append(file)               

try:
    train_f = open(os.path.join(store_path,"train.txt"),"x")
    eval_f  =open(os.path.join(store_path,"test.txt"),"x")
    train_f.write("\n".join(train_validation_files))
    eval_f.write("\n".join(test_files))
except FileExistsError as e:
        print(e)
        exit(1)

额外补充一下opencv 一些算子的尝试


import cv2 as cv
import numpy as np
from matplotlib import pyplot as plt
img_path = 'F:\\tlz\gccs_data\Data_LVLAX\Data\\06_0000.jpg'
img = cv.imread(img_path)
# canny 效果不好 存在边界丢失
edges =cv.Canny(img,0,280)


# kernel = np.ones((5,5),np.float32)/25
# dst =cv.filter2D(img,-1,kernel)
# cv.imwrite('2d_convolution.jpg',dst)

# blur = cv.blur(img,(5,5))
# blur = cv.GaussianBlur(img,(5,5),0)
# cv.imwrite('gauss_blurring.jpg',blur)
# blur = cv.bilateralFilter(img,9,75,75)
# median = cv.medianBlur(img,5)

#sabel 效果还行
# laplacian = cv.Laplacian(img,cv.CV_64F)
# sobelx = cv.Sobel(img,cv.CV_64F,1,0,ksize=5)
# sobely = cv.Sobel(img,cv.CV_64F,0,1,ksize=5)
# cv.imshow("sobelx",sobelx)

# cv.imshow("sobely",sobely)
# cv.imshow('sobelx+sobely',sobelx+sobely)
# edges =cv.Canny(sobely.astype(np.uint8),0,1)

cv.imshow("edges",edges)

cv.imwrite('F:\\tlz\gccs_data\data_argumention\\edges1.jpg',edges)

# 哈里斯边角检测
# gray = cv.cvtColor(img,cv.COLOR_BGR2GRAY)
# gray = np.float32(gray)
# dst = cv.cornerHarris(gray,10,3,0.04)
# dst = cv.dilate(dst,None)
# # Threshold for an optimal value, it may vary depending on the image.
# img[dst>0.01*dst.max()]=[0,0,255]
# cv.imshow('dst',img)
# gray = cv.cvtColor(img,cv.COLOR_BGR2GRAY)
# corners = cv.goodFeaturesToTrack(gray,25,0.01,10)
# corners = np.int0(corners)

# for i in corners:
#     x,y = i.ravel()
#     cv.circle(img,(x,y),3,255,-1)
# plt.imshow(img),plt.show()
cv.waitKey(0)
cv.destroyAllWindows()
  • 0
    点赞
  • 2
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值