如何给自己的模型添加自注意力模块

我使用的是预训练的模型,efficientnet-b4,数据集是人脸图像,注意力模块是添加在分类器之间

ps:调参数真的和炼丹一样,试了一天能才跑通代码,最后我也不知道是因为啥,感觉自己的自注意力模块还是没学太明白,很晕!不得不记录一下这个时刻~

背景:图像二分类问题

1.导入必要的包,以及配置环境

!pip install efficientnet_pytorch
import warnings
warnings.filterwarnings('ignore')
!pip install efficientnet_pytorch
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import random
import torch
import torch.nn as nn
import torch.optim as optim
!pip install vision-transformer-pytorch

from torchvision import transforms, models

from sklearn.metrics import classification_report
from sklearn.utils import shuffle

from sklearn.metrics import confusion_matrix
import seaborn as sns

from torch.utils.tensorboard import SummaryWriter
from torch.optim.lr_scheduler import ReduceLROnPlateau

import numpy as np
from tabulate import tabulate

import os
import glob
import json
import shutil

from PIL import Image, ImageDraw

import torchvision.transforms as transforms
from torchvision.datasets import ImageFolder
from torch.utils.data import DataLoader


from efficientnet_pytorch import EfficientNet
from torch import nn
from vision_transformer_pytorch import VisionTransformer

import torch.nn.functional as F
import torch
import torch.nn as nn
import torchvision.models as models

2.设置训练模型的超参数

#定义配置文件
class Config:
    def __init__(self):
        #设置输入图像的大小
        self.image_width = 128
        self.image_height = 128
        self.epoch = 1
        self.seed = 42
        self.batch_size = 16 #batchsize的大小会影响最后的输出的维度,如batch_size=32,最后输出为32*1是1维向量
        self.dataset_path = '/kaggle/input/.../'
        # self.checkpoint_filepath = 'model_checkpoint.h5'
        # self.logs_path = '/kaggle/working/logs'

        
#实例化配置函数
config = Config()

print("Checking Epoch Configuration:", config.epoch)

3.数据集的预处理

dataset = {"image_path":[],"img_status":[],"where":[]}

for where in os.listdir(config.dataset_path):
    for status in os.listdir(config.dataset_path+"/"+where):
        for image in glob.glob(os.path.join(config.dataset_path, where, status, "*.jpg")):
            dataset["image_path"].append(image)
            dataset["img_status"].append(status)
            dataset["where"].append(where)

dataset = pd.DataFrame(dataset)

#将数据集进行打乱,并对其进行升序排序
dataset = shuffle(dataset)
dataset = dataset.reset_index(drop=True)
# 对训练集-数据集进行数据增强
# 12/05 定义一些增强操作
train_transform = transforms.Compose([
    transforms.RandomHorizontalFlip(),  # 随机水平翻转
    transforms.RandomRotation(degrees=15),  # 随机旋转
    transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.1),  # 颜色扭曲
    #transforms.RandomResizedCrop(size=256, scale=(0.8, 1.0)),  # 随机裁剪和缩放
    transforms.ToTensor(),  # 转换为张量
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])  # 归一化
])
# Data Transformation for Validation and Testing
val_test_transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

# Data Loaders
train_dataset = ImageFolder(os.path.join(config.dataset_path, 'train'), transform=train_transform)
train_loader = DataLoader(train_dataset, batch_size=config.batch_size, shuffle=True)

valid_dataset = ImageFolder(os.path.join(config.dataset_path, 'valid'), transform=val_test_transform)
valid_loader = DataLoader(valid_dataset, batch_size=config.batch_size, shuffle=False)

test_dataset = ImageFolder(os.path.join(config.dataset_path, 'test'), transform=val_test_transform)
test_loader = DataLoader(test_dataset, batch_size=5, shuffle=False)

4.定义自注意力模块

class SelfAttention(nn.Module):
    def __init__(self, in_channels):
        super(SelfAttention, self).__init__()
        self.theta = nn.Conv2d(in_channels, 112, kernel_size=1, stride=1)
        self.phi = nn.Conv2d(in_channels, 112, kernel_size=1, stride=1)
        self.g = nn.Conv2d(in_channels, 64, kernel_size=1, stride=1)
        self.concat = nn.Conv2d(64, in_channels, kernel_size=1, stride=1)
        self.softmax = nn.Softmax(dim=-1)

    def forward(self, x):
        theta = self.theta(x)
        phi = self.phi(x)
        g = self.g(x)
        theta = theta.view(x.size(0), -1, x.size(2) * x.size(3))
        phi = phi.view(x.size(0), -1, x.size(2) * x.size(3))
        g = g.view(x.size(0), -1, x.size(2) * x.size(3))
#         print("Theta shape:", theta.shape)
#         print("Phi shape:", phi.shape)
#         print("G shape:", g.shape)

        theta = theta.permute(0, 2, 1)
        attn = torch.matmul(theta, phi)
        attn = self.softmax(attn)
        

        g = g.permute(0, 2, 1)
        attn_g = torch.matmul(attn, g)
        attn_g = attn_g.permute(0, 2, 1)
        attn_g = attn_g.view(x.size(0), g.size(1), x.size(2), x.size(3))
        attn_g = self.concat(attn_g)

        return attn_g + x

        # Continue with the rest of your attention mechanism...

加之后

5.自定义的预训练模型

class EfficientNetWithAttention(nn.Module):
    def __init__(self, num_classes, pretrained=True, attention_channels=1792):
        super(EfficientNetWithAttention, self).__init__()

        # Load the pre-trained EfficientNet as a feature extractor
        efficientnet = models.efficientnet_b4(pretrained=pretrained)
        self.features = efficientnet.features

        # Add custom head
        self.attention = SelfAttention(attention_channels)
        self.avg_pool = nn.AdaptiveAvgPool2d(1)
        self.fc = nn.Linear(attention_channels, num_classes)

        self.val_loss = []
        self.val_accuracy = []
        self.test_loss = []
        self.test_accuracy = []
        self.train_loss = []
        self.train_accuracy = []

    def forward(self, x):
        # Forward pass through EfficientNet feature extractor
        x = self.features(x)

        # Apply self-attention module
        x = self.attention(x)

        # Global average pooling
        x = self.avg_pool(x)
        x = x.view(x.size(0), -1)

        # Fully connected layer for classification
        x = self.fc(x)
        return x
    
       

    def print_model_summary(self):
        print(self.model)
        print("Model Summary:")
        total_params = sum(p.numel() for p in self.parameters())
        print(f"Total Parameters: {total_params}")
        trainable_params = sum(p.numel() for p in self.parameters() if p.requires_grad)
        print(f"Trainable Parameters: {trainable_params}")

    def plot_metrics_graph(self):
        epochs = range(1, len(self.train_loss) + 1)

        plt.figure(figsize=(12, 8))

        plt.subplot(2, 1, 1)
        plt.plot(epochs, self.train_loss, label='Train Loss', linewidth=2, color='blue')
        plt.plot(epochs, self.val_loss, label='Validation Loss', linewidth=2, color='orange')
        plt.plot(epochs, self.test_loss, label='Test Loss', linewidth=2, color='green')
        plt.xlabel('Epochs')
        plt.ylabel('Loss')
        plt.title('Training ,Test and Validation Loss')
        plt.legend()

        plt.subplot(2, 1, 2)
        plt.plot(epochs, self.train_accuracy, label='Train Accuracy', linewidth=2, color='green')
        plt.plot(epochs, self.val_accuracy, label='Validation Accuracy', linewidth=2, color='red')
        plt.xlabel('Epochs')
        plt.ylabel('Accuracy')
        plt.title('Training and Validation Accuracy')
        plt.legend()

        plt.tight_layout()
        plt.show()

    def plot_confusion_matrix(self, y_true, y_pred):
        cm = confusion_matrix(y_true, y_pred)

        plt.figure(figsize=(8, 6))
        sns.heatmap(cm, annot=True, fmt="d", cmap="Blues", cbar=False)
        plt.xlabel("Predicted Labels")
        plt.ylabel("True Labels")
        plt.title("Confusion Matrix")
        plt.show()

    def train_model(self, train_loader, valid_loader, num_epochs, device):
        criterion = nn.BCEWithLogitsLoss()  # Binary Cross-Entropy loss
        optimizer = optim.Adam(self.parameters(), lr=0.001)

        scheduler = ReduceLROnPlateau(optimizer, mode='min', factor=0.2, patience=3, verbose=True, min_lr=1e-6)

        for epoch in range(num_epochs):
            self.train()  # Set the model to training mode
            total_loss = 0.0
            correct_train = 0
            total_train = 0

            print(f"Epoch [{epoch+1}/{num_epochs}] - Training...")

            for batch_idx, (inputs, labels) in enumerate(train_loader):
                inputs, labels = inputs.to(device), labels.to(device)

                optimizer.zero_grad()
                outputs = self(inputs)
                loss = criterion(outputs, labels.float().unsqueeze(1))
                loss.backward()
                optimizer.step()

                total_loss += loss.item() * inputs.size(0)
                predicted_labels = (outputs >= 0.0).float()
                correct_train += (predicted_labels == labels.float().unsqueeze(1)).sum().item()
                total_train += labels.size(0)

                print(f"Epoch [{epoch+1}/{num_epochs}] - Batch [{batch_idx+1}/{len(train_loader)}] - "
                      f"Loss: {loss.item():.4f} - Train Accuracy: {correct_train / total_train:.4f}")

            average_loss = total_loss / len(train_loader.dataset)
            train_accuracy = correct_train / total_train

            self.train_loss.append(average_loss)
            self.train_accuracy.append(train_accuracy)

            self.eval()
            total_val_loss = 0.0
            correct_val = 0
            total_val = 0

            y_true = []
            y_pred = []

            with torch.no_grad():
                for inputs, labels in valid_loader:
                    inputs, labels = inputs.to(device), labels.to(device)
                    outputs = self(inputs)
                    val_loss = criterion(outputs, labels.float().unsqueeze(1))
                    total_val_loss += val_loss.item() * inputs.size(0)
                    predicted_labels = (outputs >= 0.0).float()
                    correct_val += (predicted_labels == labels.float().unsqueeze(1)).sum().item()
                    total_val += labels.size(0)

                    y_true.extend(labels.float().unsqueeze(1).cpu().numpy())
                    y_pred.extend(predicted_labels.cpu().numpy())

            average_val_loss = total_val_loss / len(valid_loader.dataset)
            val_accuracy = correct_val / total_val

            self.val_loss.append(average_val_loss)
            self.val_accuracy.append(val_accuracy)

            print(f"Epoch [{epoch+1}/{num_epochs}] - "
                  f"Train Loss: {average_loss:.4f} - Train Accuracy: {train_accuracy:.4f} - "
                  f"Val Loss: {average_val_loss:.4f} - Val Accuracy: {val_accuracy:.4f} - "
                  f"LR: {scheduler.optimizer.param_groups[0]['lr']:.6f}")

            scheduler.step(average_val_loss)
        
        self.plot_metrics_graph()
        self.plot_confusion_matrix(y_true, y_pred)

6.开始训练

# 初始化模型
num_classes = 1 # 你的类别数
model = EfficientNetWithAttention(num_classes=num_classes, pretrained=True)

# 输出模型结构
#print(model)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

# Train the model using the integrated training loop
num_epochs = config.epoch # Change this in last
model.train_model(train_loader, valid_loader, num_epochs, device)
#torch.save(model.state_dict(), 'model_efficient_b4.pth')

7.训练结果

  • 13
    点赞
  • 18
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
在 YOLOv4 中添加注意力机制模块,主要涉及以下两个步骤: 1. 实现自注意力模块注意力机制可以通过 self-attention module 实现。该模块可以基于全局信息自适应地计算出每个位置的权重,以更好地聚焦于重要的特征。下面是一个基本的自注意力模块的实现示例: ```python class SelfAttention(nn.Module): def __init__(self, in_channels): super(SelfAttention, self).__init__() self.query_conv = nn.Conv2d(in_channels=in_channels, out_channels=in_channels//8, kernel_size=1) self.key_conv = nn.Conv2d(in_channels=in_channels, out_channels=in_channels//8, kernel_size=1) self.value_conv = nn.Conv2d(in_channels=in_channels, out_channels=in_channels, kernel_size=1) self.gamma = nn.Parameter(torch.zeros(1)) def forward(self, x): batch_size, channels, height, width = x.size() proj_query = self.query_conv(x).view(batch_size, -1, width*height).permute(0, 2, 1) proj_key = self.key_conv(x).view(batch_size, -1, width*height) energy = torch.bmm(proj_query, proj_key) attention = torch.softmax(energy, dim=-1) proj_value = self.value_conv(x).view(batch_size, -1, width*height) out = torch.bmm(proj_value, attention.permute(0, 2, 1)) out = out.view(batch_size, channels, height, width) out = self.gamma * out + x return out ``` 2. 在 YOLOv4 中添加注意力模块 在 YOLOv4 的网络结构中,可以将自注意力模块添加到卷积层的后面。假设我们要在 YOLOv4 的 CSPDarknet53 中添加注意力模块,则可以按以下方式修改代码: ```python class CSPDarknet53(nn.Module): def __init__(self): super(CSPDarknet53, self).__init__() # ...省略其他代码 self.attention1 = SelfAttention(in_channels=128) self.attention2 = SelfAttention(in_channels=256) self.attention3 = SelfAttention(in_channels=512) def forward(self, x): # ...省略其他代码 x = self.conv5(x) # 添加注意力模块 x = self.attention1(x) x = self.conv6(x) x = self.conv7(x) # 添加注意力模块 x = self.attention2(x) x = self.conv8(x) # 添加注意力模块 x = self.attention3(x) x = self.conv9(x) x = self.conv10(x) return x ``` 在上面的代码中,我们在 CSPDarknet53 的第 5、7 和 8 层之后添加了自注意力模块。这样做可以提高模型的感受野,并且更好地捕捉目标的上下文信息。

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值