infQC_pub-main

qq_54251541

已于 2024-10-08 09:05:36 修改

阅读量131

点赞数 3

文章标签： python

于 2024-09-29 17:15:55 首次发布

本文链接：https://blog.csdn.net/qq_54251541/article/details/142639829

版权

import numpy as np
import os
from openpyxl import Workbook
import nibabel as nib
from matplotlib import pyplot as plt
import tensorflow as tf
from tensorflow.keras.layers import Input, Conv3D, MaxPool3D, BatchNormalization, GlobalAveragePooling3D, Dense, Dropout
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint

# 更新路径
data_directory = 'path_to_train_directory'  # 你的 train 文件夹路径，其中包含 artifact 和 normal 文件夹
model_save_path = 'path_to_save_model'      # 模型保存路径

# 参数设置
trainparams = {
    'dim': (128, 128, 70),
    'batch_size': 8,
    'n_classes': 2,
    'n_channels': 1,
    'shuffle': True
}

testparams = {
    'dim': (128, 128, 70),
    'batch_size': 8,
    'n_classes': 2,
    'n_channels': 1,
    'shuffle': True
}

# 创建并打开 Excel 文件以记录损失和准确率
wb = Workbook()
ws_loss = wb.active
ws_loss.title = "Loss"
ws_acc = wb.create_sheet(title="Acc")
ws_loss.cell(1, 1).value = 'epoch#'
ws_acc.cell(1, 1).value = 'epoch#'

# 获取文件路径并生成标签
def get_file_paths_and_labels(data_dir):
    file_paths = []
    labels = []
    
    for category in ['artifact', 'normal']:
        category_dir = os.path.join(data_dir, category)
        for file_name in os.listdir(category_dir):
            if file_name.endswith('.nii') or file_name.endswith('.nii.gz'):
                file_paths.append(os.path.join(category_dir, file_name))
                if category == 'artifact':
                    labels.append(1)  # 1表示有伪影
                else:
                    labels.append(0)  # 0表示无伪影
    return file_paths, labels

# 获取所有文件及其对应的标签
all_files, all_labels = get_file_paths_and_labels(data_directory)

# 手动划分训练集和验证集，假设80%用于训练，20%用于验证
train_files = all_files[:int(len(all_files) * 0.8)]
train_labels = all_labels[:int(len(all_labels) * 0.8)]
val_files = all_files[int(len(all_files) * 0.8):]
val_labels = all_labels[int(len(all_labels) * 0.8):]

# 自定义数据生成器
class SimpleDataGenerator(tf.keras.utils.Sequence):
    def __init__(self, file_list, labels, batch_size=8, dim=(128, 128, 70), n_channels=1, n_classes=2, shuffle=True):
        self.file_list = file_list
        self.labels = labels
        self.batch_size = batch_size
        self.dim = dim
        self.n_channels = n_channels
        self.n_classes = n_classes
        self.shuffle = shuffle
        self.on_epoch_end()

    def __len__(self):
        return int(np.floor(len(self.file_list) / self.batch_size))

    def __getitem__(self, index):
        batch_files = self.file_list[index * self.batch_size:(index + 1) * self.batch_size]
        batch_labels = self.labels[index * self.batch_size:(index + 1) * self.batch_size]
        X, y = self.__data_generation(batch_files, batch_labels)
        return X, y

    def on_epoch_end(self):
        self.indexes = np.arange(len(self.file_list))
        if self.shuffle:
            np.random.shuffle(self.indexes)

    def __data_generation(self, batch_files, batch_labels):
        X = np.empty((self.batch_size, *self.dim, self.n_channels))
        y = np.empty((self.batch_size), dtype=int)

        for i, file_path in enumerate(batch_files):
            nifti_vol = nib.load(file_path)
            X[i, :, :, :, 0] = np.array(nifti_vol.dataobj)
            y[i] = batch_labels[i]
        return X, y

# 数据生成器
training_generator = SimpleDataGenerator(train_files, train_labels, **trainparams)
validation_generator = SimpleDataGenerator(val_files, val_labels, **testparams)

# 模型架构
model = tf.keras.models.Sequential()
model.add(Input(shape=(128, 128, 70, 1)))

# 添加卷积块
model.add(Conv3D(8, kernel_size=3, activation='relu', padding='same'))
model.add(MaxPool3D(pool_size=2))
model.add(BatchNormalization())

model.add(Conv3D(16, kernel_size=3, activation='relu', padding='same'))
model.add(MaxPool3D(pool_size=2))
model.add(BatchNormalization())

model.add(Conv3D(32, kernel_size=3, activation='relu', padding='same'))
model.add(MaxPool3D(pool_size=2))
model.add(BatchNormalization())

model.add(Conv3D(64, kernel_size=3, activation='relu', padding='same'))
model.add(MaxPool3D(pool_size=2))
model.add(BatchNormalization())

# 全局池化和全连接层
model.add(GlobalAveragePooling3D())
model.add(Dense(128, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(128, activation='relu'))

# 输出层，二分类问题
model.add(Dense(1, activation='sigmoid'))

# 编译模型
model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=1e-3, decay=0.01),
              loss="binary_crossentropy",
              metrics=['accuracy'])

# 保存最佳模型的回调函数
callback_checkpoint = ModelCheckpoint(os.path.join(model_save_path, 'best_model.h5'),
                                      save_best_only=True,
                                      monitor='val_accuracy',
                                      mode='max')

# 输出模型结构
model.summary()

# 训练模型
history = model.fit(training_generator,
                    epochs=30,
                    validation_data=validation_generator,
                    shuffle=True,
                    callbacks=[callback_checkpoint])

# 保存最终模型
model.save(os.path.join(model_save_path, 'final_model.h5'))

# 绘制并保存结果图像
fig, axs = plt.subplots(2)
fig.suptitle('Network Loss and Accuracy')

train_loss = history.history['loss']
val_loss = history.history['val_loss']

train_acc = history.history['accuracy']
val_acc = history.history['val_accuracy']

epochs = range(1, len(train_acc) + 1)

axs[0].plot(epochs, train_loss, 'bo', label='Training Loss')
axs[0].plot(epochs, val_loss, 'b', label='Validation Loss')
axs[0].set_ylabel('Loss [Binary Cross-Entropy]')

axs[1].plot(epochs, train_acc, 'bo', label='Training Accuracy')
axs[1].plot(epochs, val_acc, 'b', label='Validation Accuracy')
axs[1].set_xlabel('Epochs')
axs[1].set_ylabel('Accuracy')

axs[0].legend(loc="upper right")
plt.show()

# 保存训练过程中的损失和准确率到 Excel
for epoch in epochs:
    ws_loss.cell(epoch + 1, 1).value = epoch
    ws_loss.cell(epoch + 1, 2).value = train_loss[epoch - 1]
    ws_loss.cell(epoch + 1, 3).value = val_loss[epoch - 1]
    ws_acc.cell(epoch + 1, 1).value = epoch
    ws_acc.cell(epoch + 1, 2).value = train_acc[epoch - 1]
    ws_acc.cell(epoch + 1, 3).value = val_acc[epoch - 1]

# 保存 Excel 文件
wb.save(os.path.join(model_save_path, 'training_results.xlsx'))

INFO: pip is looking at multiple versions of tensorflow to determine which version is compatible with other requirements. This could take a while.
ERROR: Ignored the following versions that require a different python version: 1.25.0 Requires-Python >=3.9; 1.25.1 Requires-Python >=3.9; 1.25.2 Requires-Python >=3.9; 1.26.0 Requires-Python <3.13,>=3.9; 1.26.1 Requires-Python <3.13,>=3.9; 1.26.2 Requires-Python >=3.9; 1.26.3 Requires-Python >=3.9; 1.26.4 Requires-Python >=3.9; 2.0.0 Requires-Python >=3.9; 2.0.1 Requires-Python >=3.9; 2.0.2 Requires-Python >=3.9; 2.1.0 Requires-Python >=3.10; 2.1.0rc1 Requires-Python >=3.10; 2.1.1 Requires-Python >=3.10; 2.14.1 Requires-Python >=3.9; 2.15.0 Requires-Python >=3.9; 2.15.1 Requires-Python >=3.9; 2.15.2 Requires-Python >=3.9; 2.16.0 Requires-Python >=3.9; 2.16.1 Requires-Python >=3.9; 2.16.2 Requires-Python >=3.9; 2.17.0 Requires-Python >=3.9; 2.17.1 Requires-Python >=3.9; 2.18.0 Requires-Python >=3.9; 3.12.0 Requires-Python >=3.9; 3.12.1 Requires-Python >=3.9
ERROR: Could not find a version that satisfies the requirement tf-estimator-nightly==2.8.0.dev2021122109 (from tensorflow) (from versions: none)
ERROR: No matching distribution found for tf-estimator-nightly==2.8.0.dev2021122109

import numpy as np
import tensorflow as tf
import nibabel as nib
import matplotlib.pyplot as plt


class DataGenerator(tf.keras.utils.Sequence):

    def __init__(self, list_ids, labels, batch_size=5, dim=(96, 96, 70), n_channels=1, n_classes=2, shuffle=True):
        """Initialization"""
        self.dim = dim
        self.batch_size = batch_size
        self.labels = labels
        self.list_ids = list_ids
        self.n_channels = n_channels
        self.n_classes = n_classes
        self.shuffle = shuffle
        self.on_epoch_end()

    def __len__(self):
        """Denotes the number of batches per epoch"""
        return int(np.floor(len(self.list_ids) / self.batch_size))

    def __getitem__(self, index):
        """Generate one batch of data"""
        # Generate indices of the batch
        indexes = self.indexes[index * self.batch_size:(index + 1) * self.batch_size]

        # Find list of IDs
        list_ids_temp = [self.list_ids[k] for k in indexes]

        # Generate data
        X, y = self.__data_generation(list_ids_temp)

        return X, y

    def on_epoch_end(self):
        """Updates indexes after each epoch"""
        self.indexes = np.arange(len(self.list_ids))
        if self.shuffle:
            np.random.shuffle(self.indexes)

    def __data_generation(self, list_ids_temp):
        """Generates data containing batch_size samples"""  # X : (n_samples, *dim, n_channels)
        # Initialization
        X = np.empty((self.batch_size, *self.dim, self.n_channels))
        y = np.empty(self.batch_size, dtype=int)

        # Generate data
        for i, subid in enumerate(list_ids_temp):
            nifti_vol = nib.load(subid + '.nii.gz')
            X[i, :, :, :, 0] = np.array(nifti_vol.dataobj)
            X = X.astype('float32')
            y[i] = self.labels[subid]

        return X, y

from skimage.transform import resize
import numpy as np 
import tensorflow as tf
import nibabel as nib
import matplotlib.pyplot as plt
 
class DataGenerator(tf.keras.utils.Sequence):
 
    def __init__(self, list_ids, labels, batch_size=5, dim=(96, 96, 70), n_channels=1, n_classes=2, shuffle=True):
        """Initialization"""
        self.dim = dim
        self.batch_size = batch_size
        self.labels = labels
        self.list_ids = list_ids
        self.n_channels = n_channels
        self.n_classes = n_classes
        self.shuffle = shuffle
        self.on_epoch_end()
 
    def __len__(self):
        """Denotes the number of batches per epoch"""
        return int(np.floor(len(self.list_ids) / self.batch_size))
 
    def __getitem__(self, index):
        """Generate one batch of data"""
        # Generate indices of the batch
        indexes = self.indexes[index * self.batch_size:(index + 1) * self.batch_size]
 
        # Find list of IDs
        list_ids_temp = [self.list_ids[k] for k in indexes]
 
        # Generate data
        X, y = self.__data_generation(list_ids_temp)
 
        return X, y
 
    def on_epoch_end(self):
        """Updates indexes after each epoch"""
        self.indexes = np.arange(len(self.list_ids))
        if self.shuffle:
            np.random.shuffle(self.indexes)
 
    def __data_generation(self, list_ids_temp):
        """Generates data containing batch_size samples"""  # X : (n_samples, *dim, n_channels)
        # Initialization
        X = np.empty((self.batch_size, *self.dim, self.n_channels))
        y = np.empty(self.batch_size, dtype=int)
 
        # Generate data
        for i, subid in enumerate(list_ids_temp):
            # Load NIfTI file
            nifti_vol = nib.load(subid + '.nii.gz')
            nifti_data = np.array(nifti_vol.dataobj)
            
            # Resize the NIfTI data to match the expected dimensions (96, 96, 70)
            resized_data = resize(nifti_data, self.dim, anti_aliasing=True)
            
            # Store the resized data in X
            X[i, :, :, :, 0] = resized_data.astype('float32')
            
            # Get label for the sample
            y[i] = self.labels[subid]
 
        return X, y

import numpy as np
import os
from openpyxl import Workbook
import nibabel as nib
from matplotlib import pyplot as plt
import tensorflow as tf
from tensorflow.keras.layers import Input, Conv3D, MaxPool3D, BatchNormalization, GlobalAveragePooling3D, Dense, Dropout
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
from skimage.transform import resize

# 更新路径
data_directory = '/media/wagnchogn/data/qu/media artifact/infQC_pub-main/infQC_pub-dataset/train'  # 你的 train 文件夹路径，其中包含 artifact 和 normal 文件夹
model_save_path = '/media/wagnchogn/data/qu/media artifact/infQC_pub-main/models'  # 模型保存路径

# 参数设置
trainparams = {
    'dim': (96, 96, 70),
    'batch_size': 8,
    'n_classes': 2,
    'n_channels': 1,
    'shuffle': True
}

testparams = {
    'dim': (96, 96, 70),
    'batch_size': 8,
    'n_classes': 2,
    'n_channels': 1,
    'shuffle': True
}

# 创建并打开 Excel 文件以记录损失和准确率
wb = Workbook()
ws_loss = wb.active
ws_loss.title = "Loss"
ws_acc = wb.create_sheet(title="Acc")
ws_loss.cell(1, 1).value = 'epoch#'
ws_acc.cell(1, 1).value = 'epoch#'


# 获取文件路径并生成标签
def get_file_paths_and_labels(data_dir):
    file_paths = []
    labels = []

    for category in ['artifact', 'normal']:
        category_dir = os.path.join(data_dir, category)
        for file_name in os.listdir(category_dir):
            if file_name.endswith('.nii') or file_name.endswith('.nii.gz'):
                file_paths.append(os.path.join(category_dir, file_name))
                if category == 'artifact':
                    labels.append(1)  # 1表示有伪影
                else:
                    labels.append(0)  # 0表示无伪影
    return file_paths, labels


# 获取所有文件及其对应的标签
all_files, all_labels = get_file_paths_and_labels(data_directory)

# 手动划分训练集和验证集，假设80%用于训练，20%用于验证
train_files = all_files[:int(len(all_files) * 0.8)]
train_labels = all_labels[:int(len(all_labels) * 0.8)]
val_files = all_files[int(len(all_files) * 0.8):]
val_labels = all_labels[int(len(all_labels) * 0.8):]


# 自定义数据生成器
class SimpleDataGenerator(tf.keras.utils.Sequence):
    def __init__(self, file_list, labels, batch_size=8, dim=(96, 96, 70), n_channels=1, n_classes=2, shuffle=True):
        self.file_list = file_list
        self.labels = labels
        self.batch_size = batch_size
        self.dim = dim
        self.n_channels = n_channels
        self.n_classes = n_classes
        self.shuffle = shuffle
        self.on_epoch_end()

    def __len__(self):
        return int(np.floor(len(self.file_list) / self.batch_size))

    def __getitem__(self, index):
        batch_files = self.file_list[index * self.batch_size:(index + 1) * self.batch_size]
        batch_labels = self.labels[index * self.batch_size:(index + 1) * self.batch_size]
        X, y = self.__data_generation(batch_files, batch_labels)
        return X, y

    def on_epoch_end(self):
        self.indexes = np.arange(len(self.file_list))
        if self.shuffle:
            np.random.shuffle(self.indexes)

    def __data_generation(self, batch_files, batch_labels):
        X = np.empty((self.batch_size, *self.dim, self.n_channels))
        y = np.empty((self.batch_size), dtype=int)

        for i, file_path in enumerate(batch_files):
            nifti_vol = nib.load(file_path)
            nifti_data = np.array(nifti_vol.dataobj)

            # Resize the NIfTI data to match the expected dimensions (96, 96, 70)
            resized_data = resize(nifti_data, self.dim, anti_aliasing=True)

            # Store the resized data in X
            X[i, :, :, :, 0] = resized_data

            y[i] = batch_labels[i]
        return X, y


# 数据生成器
training_generator = SimpleDataGenerator(train_files, train_labels, **trainparams)
validation_generator = SimpleDataGenerator(val_files, val_labels, **testparams)

# 模型架构
model = tf.keras.models.Sequential()
model.add(Input(shape=(96, 96, 70, 1)))

# 添加卷积块
model.add(Conv3D(8, kernel_size=3, activation='relu', padding='same'))
model.add(MaxPool3D(pool_size=2))
model.add(BatchNormalization())

model.add(Conv3D(16, kernel_size=3, activation='relu', padding='same'))
model.add(MaxPool3D(pool_size=2))
model.add(BatchNormalization())

model.add(Conv3D(32, kernel_size=3, activation='relu', padding='same'))
model.add(MaxPool3D(pool_size=2))
model.add(BatchNormalization())

model.add(Conv3D(64, kernel_size=3, activation='relu', padding='same'))
model.add(MaxPool3D(pool_size=2))
model.add(BatchNormalization())

# 全局池化和全连接层
model.add(GlobalAveragePooling3D())
model.add(Dense(128, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(128, activation='relu'))

# 输出层，二分类问题
model.add(Dense(1, activation='sigmoid'))

# 编译模型
model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=1e-3, decay=0.01),
              loss="binary_crossentropy",
              metrics=['accuracy'])

# 保存最佳模型的回调函数
callback_checkpoint = ModelCheckpoint(os.path.join(model_save_path, 'best_model.h5'),
                                      save_best_only=True,
                                      monitor='val_accuracy',
                                      mode='max')

# 输出模型结构
model.summary()

# 训练模型
history = model.fit(training_generator,
                    epochs=30,
                    validation_data=validation_generator,
                    shuffle=True,
                    callbacks=[callback_checkpoint])

# 保存最终模型
model.save(os.path.join(model_save_path, 'final_model.h5'))

# 绘制并保存结果图像
fig, axs = plt.subplots(2)
fig.suptitle('Network Loss and Accuracy')

train_loss = history.history['loss']
val_loss = history.history['val_loss']

train_acc = history.history['accuracy']
val_acc = history.history['val_accuracy']

epochs = range(1, len(train_acc) + 1)

axs[0].plot(epochs, train_loss, 'bo', label='Training Loss')
axs[0].plot(epochs, val_loss, 'b', label='Validation Loss')
axs[0].set_ylabel('Loss [Binary Cross-Entropy]')

axs[1].plot(epochs, train_acc, 'bo', label='Training Accuracy')
axs[1].plot(epochs, val_acc, 'b', label='Validation Accuracy')
axs[1].set_xlabel('Epochs')
axs[1].set_ylabel('Accuracy')

axs[0].legend(loc="upper right")
plt.show()

# 保存训练过程中的损失和准确率到 Excel
for epoch in epochs:
    ws_loss.cell(epoch + 1, 1).value = epoch
    ws_loss.cell(epoch + 1, 2).value = train_loss[epoch - 1]
    ws_loss.cell(epoch + 1, 3).value = val_loss[epoch - 1]
    ws_acc.cell(epoch + 1, 1).value = epoch
    ws_acc.cell(epoch + 1, 2).value = train_acc[epoch - 1]
    ws_acc.cell(epoch + 1, 3).value = val_acc[epoch - 1]

# 保存 Excel 文件
wb.save(os.path.join(model_save_path, 'training_results.xlsx'))