如何给自己的模型添加自注意力模块

一只发呆的猪

已于 2023-12-15 12:43:21 修改

阅读量1.1k

点赞数 13

文章标签： python opencv

于 2023-12-15 10:08:11 首次发布

本文链接：https://blog.csdn.net/weixin_56180495/article/details/135005267

版权

我使用的是预训练的模型，efficientnet-b4，数据集是人脸图像，注意力模块是添加在分类器之间

ps:调参数真的和炼丹一样，试了一天能才跑通代码，最后我也不知道是因为啥，感觉自己的自注意力模块还是没学太明白，很晕！不得不记录一下这个时刻~

背景：图像二分类问题

1.导入必要的包，以及配置环境

!pip install efficientnet_pytorch
import warnings
warnings.filterwarnings('ignore')
!pip install efficientnet_pytorch
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import random
import torch
import torch.nn as nn
import torch.optim as optim
!pip install vision-transformer-pytorch

from torchvision import transforms, models

from sklearn.metrics import classification_report
from sklearn.utils import shuffle

from sklearn.metrics import confusion_matrix
import seaborn as sns

from torch.utils.tensorboard import SummaryWriter
from torch.optim.lr_scheduler import ReduceLROnPlateau

import numpy as np
from tabulate import tabulate

import os
import glob
import json
import shutil

from PIL import Image, ImageDraw

import torchvision.transforms as transforms
from torchvision.datasets import ImageFolder
from torch.utils.data import DataLoader


from efficientnet_pytorch import EfficientNet
from torch import nn
from vision_transformer_pytorch import VisionTransformer

import torch.nn.functional as F
import torch
import torch.nn as nn
import torchvision.models as models

2.设置训练模型的超参数

#定义配置文件
class Config:
    def __init__(self):
        #设置输入图像的大小
        self.image_width = 128
        self.image_height = 128
        self.epoch = 1
        self.seed = 42
        self.batch_size = 16 #batchsize的大小会影响最后的输出的维度，如batch_size=32,最后输出为32*1是1维向量
        self.dataset_path = '/kaggle/input/.../'
        # self.checkpoint_filepath = 'model_checkpoint.h5'
        # self.logs_path = '/kaggle/working/logs'

        
#实例化配置函数
config = Config()

print("Checking Epoch Configuration:", config.epoch)

3.数据集的预处理

dataset = {"image_path":[],"img_status":[],"where":[]}

for where in os.listdir(config.dataset_path):
    for status in os.listdir(config.dataset_path+"/"+where):
        for image in glob.glob(os.path.join(config.dataset_path, where, status, "*.jpg")):
            dataset["image_path"].append(image)
            dataset["img_status"].append(status)
            dataset["where"].append(where)

dataset = pd.DataFrame(dataset)

#将数据集进行打乱，并对其进行升序排序
dataset = shuffle(dataset)
dataset = dataset.reset_index(drop=True)
# 对训练集-数据集进行数据增强
# 12/05 定义一些增强操作
train_transform = transforms.Compose([
    transforms.RandomHorizontalFlip(),  # 随机水平翻转
    transforms.RandomRotation(degrees=15),  # 随机旋转
    transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.1),  # 颜色扭曲
    #transforms.RandomResizedCrop(size=256, scale=(0.8, 1.0)),  # 随机裁剪和缩放
    transforms.ToTensor(),  # 转换为张量
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])  # 归一化
])
# Data Transformation for Validation and Testing
val_test_transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

# Data Loaders
train_dataset = ImageFolder(os.path.join(config.dataset_path, 'train'), transform=train_transform)
train_loader = DataLoader(train_dataset, batch_size=config.batch_size, shuffle=True)

valid_dataset = ImageFolder(os.path.join(config.dataset_path, 'valid'), transform=val_test_transform)
valid_loader = DataLoader(valid_dataset, batch_size=config.batch_size, shuffle=False)

test_dataset = ImageFolder(os.path.join(config.dataset_path, 'test'), transform=val_test_transform)
test_loader = DataLoader(test_dataset, batch_size=5, shuffle=False)

4.定义自注意力模块

class SelfAttention(nn.Module):
    def __init__(self, in_channels):
        super(SelfAttention, self).__init__()
        self.theta = nn.Conv2d(in_channels, 112, kernel_size=1, stride=1)
        self.phi = nn.Conv2d(in_channels, 112, kernel_size=1, stride=1)
        self.g = nn.Conv2d(in_channels, 64, kernel_size=1, stride=1)
        self.concat = nn.Conv2d(64, in_channels, kernel_size=1, stride=1)
        self.softmax = nn.Softmax(dim=-1)

    def forward(self, x):
        theta = self.theta(x)
        phi = self.phi(x)
        g = self.g(x)
        theta = theta.view(x.size(0), -1, x.size(2) * x.size(3))
        phi = phi.view(x.size(0), -1, x.size(2) * x.size(3))
        g = g.view(x.size(0), -1, x.size(2) * x.size(3))
#         print("Theta shape:", theta.shape)
#         print("Phi shape:", phi.shape)
#         print("G shape:", g.shape)

        theta = theta.permute(0, 2, 1)
        attn = torch.matmul(theta, phi)
        attn = self.softmax(attn)
        

        g = g.permute(0, 2, 1)
        attn_g = torch.matmul(attn, g)
        attn_g = attn_g.permute(0, 2, 1)
        attn_g = attn_g.view(x.size(0), g.size(1), x.size(2), x.size(3))
        attn_g = self.concat(attn_g)

        return attn_g + x

        # Continue with the rest of your attention mechanism...

加之后

5.自定义的预训练模型

class EfficientNetWithAttention(nn.Module):
    def __init__(self, num_classes, pretrained=True, attention_channels=1792):
        super(EfficientNetWithAttention, self).__init__()

        # Load the pre-trained EfficientNet as a feature extractor
        efficientnet = models.efficientnet_b4(pretrained=pretrained)
        self.features = efficientnet.features

        # Add custom head
        self.attention = SelfAttention(attention_channels)
        self.avg_pool = nn.AdaptiveAvgPool2d(1)
        self.fc = nn.Linear(attention_channels, num_classes)

        self.val_loss = []
        self.val_accuracy = []
        self.test_loss = []
        self.test_accuracy = []
        self.train_loss = []
        self.train_accuracy = []

    def forward(self, x):
        # Forward pass through EfficientNet feature extractor
        x = self.features(x)

        # Apply self-attention module
        x = self.attention(x)

        # Global average pooling
        x = self.avg_pool(x)
        x = x.view(x.size(0), -1)

        # Fully connected layer for classification
        x = self.fc(x)
        return x
    
       

    def print_model_summary(self):
        print(self.model)
        print("Model Summary:")
        total_params = sum(p.numel() for p in self.parameters())
        print(f"Total Parameters: {total_params}")
        trainable_params = sum(p.numel() for p in self.parameters() if p.requires_grad)
        print(f"Trainable Parameters: {trainable_params}")

    def plot_metrics_graph(self):
        epochs = range(1, len(self.train_loss) + 1)

        plt.figure(figsize=(12, 8))

        plt.subplot(2, 1, 1)
        plt.plot(epochs, self.train_loss, label='Train Loss', linewidth=2, color='blue')
        plt.plot(epochs, self.val_loss, label='Validation Loss', linewidth=2, color='orange')
        plt.plot(epochs, self.test_loss, label='Test Loss', linewidth=2, color='green')
        plt.xlabel('Epochs')
        plt.ylabel('Loss')
        plt.title('Training ,Test and Validation Loss')
        plt.legend()

        plt.subplot(2, 1, 2)
        plt.plot(epochs, self.train_accuracy, label='Train Accuracy', linewidth=2, color='green')
        plt.plot(epochs, self.val_accuracy, label='Validation Accuracy', linewidth=2, color='red')
        plt.xlabel('Epochs')
        plt.ylabel('Accuracy')
        plt.title('Training and Validation Accuracy')
        plt.legend()

        plt.tight_layout()
        plt.show()

    def plot_confusion_matrix(self, y_true, y_pred):
        cm = confusion_matrix(y_true, y_pred)

        plt.figure(figsize=(8, 6))
        sns.heatmap(cm, annot=True, fmt="d", cmap="Blues", cbar=False)
        plt.xlabel("Predicted Labels")
        plt.ylabel("True Labels")
        plt.title("Confusion Matrix")
        plt.show()

    def train_model(self, train_loader, valid_loader, num_epochs, device):
        criterion = nn.BCEWithLogitsLoss()  # Binary Cross-Entropy loss
        optimizer = optim.Adam(self.parameters(), lr=0.001)

        scheduler = ReduceLROnPlateau(optimizer, mode='min', factor=0.2, patience=3, verbose=True, min_lr=1e-6)

        for epoch in range(num_epochs):
            self.train()  # Set the model to training mode
            total_loss = 0.0
            correct_train = 0
            total_train = 0

            print(f"Epoch [{epoch+1}/{num_epochs}] - Training...")

            for batch_idx, (inputs, labels) in enumerate(train_loader):
                inputs, labels = inputs.to(device), labels.to(device)

                optimizer.zero_grad()
                outputs = self(inputs)
                loss = criterion(outputs, labels.float().unsqueeze(1))
                loss.backward()
                optimizer.step()

                total_loss += loss.item() * inputs.size(0)
                predicted_labels = (outputs >= 0.0).float()
                correct_train += (predicted_labels == labels.float().unsqueeze(1)).sum().item()
                total_train += labels.size(0)

                print(f"Epoch [{epoch+1}/{num_epochs}] - Batch [{batch_idx+1}/{len(train_loader)}] - "
                      f"Loss: {loss.item():.4f} - Train Accuracy: {correct_train / total_train:.4f}")

            average_loss = total_loss / len(train_loader.dataset)
            train_accuracy = correct_train / total_train

            self.train_loss.append(average_loss)
            self.train_accuracy.append(train_accuracy)

            self.eval()
            total_val_loss = 0.0
            correct_val = 0
            total_val = 0

            y_true = []
            y_pred = []

            with torch.no_grad():
                for inputs, labels in valid_loader:
                    inputs, labels = inputs.to(device), labels.to(device)
                    outputs = self(inputs)
                    val_loss = criterion(outputs, labels.float().unsqueeze(1))
                    total_val_loss += val_loss.item() * inputs.size(0)
                    predicted_labels = (outputs >= 0.0).float()
                    correct_val += (predicted_labels == labels.float().unsqueeze(1)).sum().item()
                    total_val += labels.size(0)

                    y_true.extend(labels.float().unsqueeze(1).cpu().numpy())
                    y_pred.extend(predicted_labels.cpu().numpy())

            average_val_loss = total_val_loss / len(valid_loader.dataset)
            val_accuracy = correct_val / total_val

            self.val_loss.append(average_val_loss)
            self.val_accuracy.append(val_accuracy)

            print(f"Epoch [{epoch+1}/{num_epochs}] - "
                  f"Train Loss: {average_loss:.4f} - Train Accuracy: {train_accuracy:.4f} - "
                  f"Val Loss: {average_val_loss:.4f} - Val Accuracy: {val_accuracy:.4f} - "
                  f"LR: {scheduler.optimizer.param_groups[0]['lr']:.6f}")

            scheduler.step(average_val_loss)
        
        self.plot_metrics_graph()
        self.plot_confusion_matrix(y_true, y_pred)

6.开始训练

# 初始化模型
num_classes = 1 # 你的类别数
model = EfficientNetWithAttention(num_classes=num_classes, pretrained=True)

# 输出模型结构
#print(model)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

# Train the model using the integrated training loop
num_epochs = config.epoch # Change this in last
model.train_model(train_loader, valid_loader, num_epochs, device)
#torch.save(model.state_dict(), 'model_efficient_b4.pth')

7.训练结果

一只发呆的猪

关注

13
点赞
踩
20

收藏

觉得还不错? 一键收藏
0
评论
如何给自己的模型添加自注意力模块

ps:调参数真的和炼丹一样，试了一天能才跑通代码，最后我也不知道是因为啥，感觉自己的自注意力模块还是没学太明白，很晕！不得不记录一下这个时刻~我使用的是预训练的模型，efficientnet-b4，数据集是人脸图像，注意力模块是添加在分类器之间。有一个bug就是显示没有train_loss,具体原因我也不知道在哪里，应该是和我定义的位置有关系。1.导入必要的包，以及配置环境。2.设置训练模型的超参数。5.自定义的预训练模型。背景：图像二分类问题。4.定义自注意力模块。
复制链接

扫一扫