如果直接用预训练的网络模型,代码如下:
import os
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
from torchvision import models, transforms
from torch.autograd import Variable
from pydicom import dcmread
from PIL import Image
# 设置CUDA设备
os.environ['CUDA_VISIBLE_DEVICES'] = '0'
device = 'cuda' if torch.cuda.is_available() else 'cpu'
# 加载预训练模型
model = models.densenet121(pretrained=True)
model.classifier = torch.nn.Sequential() # 移除分类器层
# 修改第一个卷积层以接受1个通道的输入
# DenseNet121的第一个卷积层是conv1,具有64个输出通道,kernel_size为7,stride为2,padding为3
model.features[0] = nn.Conv2d(1, 64, kernel_size=7, stride=2, padding=3, bias=False)
model = model.to(device)
model.eval()
# 图像预处理
preprocess = transforms.Compose([
transforms.Resize((512, 512)),
transforms.ToTensor(),
transforms.Normalize(mean=[0.485], std=[0.229]),
])
# 读取DICOM文件并转换为图像,这个确保了将像素数据转换为无符号8位整数
def dcm_to_image(dcm_path):
dcm = dcmread(dcm_path)
image = dcm.pixel_array
# 将像素数据转换为无符号8位整数
# 找到最小值和最大值
min_val = image.min()
max_val = image.max()
# 缩放像素值到 [0, 255]
if max_val > 255:
image = (image - min_val) / (max_val - min_val) * 255
image = image.astype(np.uint8)
# 转换为灰度图
pil_image = Image.fromarray(image).convert('L')
return pil_image
# 提取特征
def extract_features(image_path):
image = dcm_to_image(image_path)
image = preprocess(image)
image = image.unsqueeze(0).to(device)
with torch.no_grad():
features = model(image)
return features.cpu().numpy().flatten()
# 处理所有DICOM文件
def process_dicom_files(dicom_folder, output_csv):
feature_list = []
filenames = []
for file in os.listdir(dicom_folder):
if file.endswith('.dcm'):
file_path = os.path.join(dicom_folder, file)
features = extract_features(file_path)
feature_list.append(features)
filenames.append(file)
# 保存特征为CSV
df = pd.DataFrame(feature_list, index=filenames)
df.to_csv(output_csv)
# 指定DICOM文件夹和输出CSV路径
dicom_folder = '/data2/DCM_AP_files_last_version'
output_csv = '/data2/dcm_dl_features.csv'
# 提取特征并保存
process_dicom_files(dicom_folder, output_csv)