基于pytorch框架从0开始搭建CAM类激活热力图

由于ResNet等以平均池化和一个全连接层结尾的cnn并不需要修改网络结构就可以使用CAM,所以本文基于AlexNet进行搭建。使用Kaggle的猫狗数据集,代码如下:

import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms
from torch.utils.data import Dataset
from PIL import Image
import glob
from torch.utils.data import DataLoader
import numpy as np
from torchvision.models import alexnet
from datetime import datetime
import time
import os

class train_ImageDataset(Dataset):
    def __init__(self, root):

        # Transforms for low resolution images and high resolution images
        self.transform = transforms.Compose(
            [
                transforms.RandomHorizontalFlip(p=0.3),
                transforms.RandomVerticalFlip(p=0.3),
                transforms.Resize((256, 256)),
                transforms.ToTensor(),
                transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
            ])# 归一化

        self.files = sorted(glob.glob(root + "/*.*"))

    def __getitem__(self, index):
        path = self.files[index % len(self.files)]
        label = path[path.index(".")-3:path.index(".")]
        img = Image.open(self.files[index % len(self.files)])
        img = self.transform(img)
        #print(img.shape)
        #print(label)
        if label == "cat":
            label = [0, 1]
        if label == "dog":
            label = [1, 0]
        label = torch.Tensor(label)
        #print(label)
        return img, label
    def __len__(self):
        return len(self.files)

class test_ImageDataset(Dataset):
    def __init__(self, root):

        # Transforms for low resolution images and high resolution images
        self.transform = transforms.Compose(
            [
                transforms.Resize((256, 256)),
                transforms.ToTensor(),
                transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
            ])# 归一化

        self.files = sorted(glob.glob(root + "/*.*"))

    def __getitem__(self, index):
        path = self.files[index % len(self.files)]
        label = path[path.index(".")-3:path.index(".")]
        img = Image.open(self.files[index % len(self.files)])
        img = self.transform(img)
        #print(img.shape)
        #print(label)
        if label == "cat":
            label = [0, 1]
        if label == "dog":
            label = [1, 0]
        label = torch.Tensor(label)
        #print(label)
        return img, label
    def __len__(self):
        return len(self.files)




def train(model, device, train_loader, optimizer, epoch, loss):
    start_epoch = time.time()
    model.train()
    error_num = 0
    sum_num = 0
    sum_loss = 0
    for i, (input, label) in enumerate(train_loader):
        input = input.to(device)
        target = label.to(device)
        optimizer.zero_grad()
        output = model(input)
        Loss = loss(output, target)
        Loss.backward()
        optimizer.step()
        pred = output.max(1, keepdim=True)[1].squeeze(1).cpu().numpy()  # 找到概率最大的下标
        target = target.max(1, keepdim=True)[1].detach().squeeze(1).cpu().numpy()
        error_num += np.sum(np.abs(pred - target))
        sum_num += pred.shape[0]
        sum_loss += Loss.item()
    acc = 1.0 - error_num / sum_num
    loss_avg = sum_loss / len(train_loader)
    print("Train:[Epoch %d] [Loss: %f] [Acc: %f]" % (epoch, loss_avg, acc))
    end_time = time.time()
    times = end_time - start_epoch
    print(times)
    if epoch % 5 == 0:
        with open(Log_txt, "a") as f:
            f.write("Train:[Iterations %d] [Loss: %f] [Acc: %f] [Epoch Time: %f]\n" % (epoch, loss_avg, acc, times))
# 测试
def test(model, device, test_loader, epoch, loss):
    model.eval()
    with torch.no_grad():
        error_num = 0
        sum_num = 0
        sum_loss = 0
        for input, label in test_loader:
            input = input.to(device)
            target = label.to(device)
            optimizer.zero_grad()
            output = model(input)
            Loss = loss(output, target)
            pred = output.max(1, keepdim=True)[1].squeeze(1).cpu().numpy()  # 找到概率最大的下标
            target = target.max(1, keepdim=True)[1].detach().squeeze(1).cpu().numpy()
            error_num += np.sum(np.abs(pred-target))
            sum_num += pred.shape[0]
            sum_loss += Loss.item()
        acc = 1.0 - error_num / sum_num
        loss_avg = sum_loss / len(test_loader)
        print("Test:[Epoch %d] [Loss: %f] [Acc: %f]"% (epoch, loss_avg, acc))
        if epoch % 5 == 0:
            with open(Log_txt, "a") as f:
                f.write("Test:[Iterations %d] [Loss: %f] [Acc: %f]\n"% (epoch, loss_avg, acc))

class cnn(nn.Module):
    def __init__(self):
        super(cnn,self).__init__()
        self.model = alexnet(pretrained=True)
        self.encoder = nn.Sequential(*list(self.model.children())[0])  #只取卷积层,如果使用vgg16,只需要把alexnet改成vgg16即可
        self.avg = nn.AdaptiveAvgPool2d((1,1))
        self.fc = nn.Linear(256,2)
    def forward(self, x):
        x = self.encoder(x)  #batchsize*256*7*7
        x = self.avg(x)
        x = x.view(x.shape[0], -1)
        x = self.fc(x)
        return x

if __name__ == '__main__':
    os.makedirs("log",exist_ok=True)
    os.makedirs("saved_model", exist_ok=True)
    DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    Log_txt = "./log/" + str(datetime.now()).replace(" ","_").replace(":","_") + "_result.txt"
    start_all_time = time.time()
    train_loader = DataLoader(
        train_ImageDataset("./train"),
        batch_size=256,
        shuffle=True,
        num_workers=0,
    )
    test_loader = DataLoader(
        test_ImageDataset("./test"),
        batch_size=256,
        shuffle=True,
        num_workers=0,
    )
    model = cnn().to(DEVICE)
    print(model)
    optimizer = optim.Adam(model.parameters(), lr=0.0001)
    loss = torch.nn.CrossEntropyLoss().to(DEVICE)
    EPOCH = 50
    for epoch in range(1, EPOCH + 1):
        train(model, DEVICE, train_loader, optimizer, epoch, loss)
        test(model, DEVICE, test_loader, epoch, loss)
        torch.save(model.state_dict(), './saved_model/AlexNet_oriCAM_CAT&DOG_iteration_' + str(epoch) + '.pth') # 保存训练完成后的模型
    end_all_time = time.time()
    time_all = end_all_time - start_all_time
    with open(Log_txt, "a") as f:
        f.write("All Time: %f" % (time_all))
   

如果需要输出类激活热力图,代码如下:

import os
from torch.utils.data import Dataset

import glob
from torch.utils.data import DataLoader
import pandas as pd

import cv2
import torch
import torch.nn as nn
from torchvision.models import alexnet
from torchvision import datasets, transforms
import numpy as np
from PIL import Image
mean=[0.485, 0.456, 0.406]
std=[0.229, 0.224, 0.225]

class test_ImageDataset(Dataset):
    def __init__(self, root):

        # Transforms for low resolution images and high resolution images
        self.transform = transforms.Compose(
            [
                transforms.Resize((256, 256)),
                transforms.ToTensor(),
                transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
            ])# 归一化

        self.files = sorted(glob.glob(root + "/*.*"))

    def __getitem__(self, index):
        path = self.files[index % len(self.files)]
        label = path[path.index(".")-3:path.index(".")]
        img = Image.open(self.files[index % len(self.files)])
        img = self.transform(img)
        #print(img.shape)
        #print(label)
        if label == "cat":
            label = [0, 1]
        if label == "dog":
            label = [1, 0]
        label = torch.Tensor(label)
        #print(label)
        return img, label
    def __len__(self):
        return len(self.files)

def denormalize(tensors):
    """ Denormalizes image tensors using mean and std """
    for c in range(3):
        tensors[c,:,:].mul_(std[c]).add_(mean[c])
    return torch.clamp(tensors, 0, 255)



class cnn(nn.Module):
    def __init__(self):
        super(cnn,self).__init__()
        self.model = alexnet(pretrained=True)
        self.encoder = nn.Sequential(*list(self.model.children())[0])
        self.avg = nn.AdaptiveAvgPool2d((1,1))
        self.fc = nn.Linear(256,2)
    def forward(self, x):

        xx = self.encoder(x)  #batchsize*256*7*7
        x = self.avg(xx)
        x = x.view(x.shape[0], -1)
        x = self.fc(x)
        return x, xx


# 测试
def test(model, device, test_loader):
    model.eval()
    for name, param in model.named_parameters():
        if name=="fc.weight":
            w = param.detach().cpu().numpy()
            break
    with torch.no_grad():
        index = 1
        for data, target in test_loader:
            data_de = denormalize(data.squeeze())   #防止后面data变成cuda
            data, target = data.to(device), target.to(device)
            true_output, featuremap = model(data)
            featuremap = featuremap.detach().squeeze().cpu().numpy()
            pred = true_output.max(1, keepdim=True)[1].squeeze(1).cpu().numpy()  # 找到概率最大的下标
            #print(target)
            target = target.max(1, keepdim=True)[1].detach().squeeze(1).cpu().numpy()
            bo = (pred == target)
            dir = "./oriCAM_output/test/%d_attentionMap_%d_%s" % (index, int(pred[0]), str(bo))
            os.makedirs(dir, exist_ok=True)

            ori__img = np.array(data_de)[::-1, :, :].transpose(1, 2, 0) * 255.0
            ori_img = cv2.resize(ori__img, (500, 500))
            original_img = ori_img.astype(np.uint8)
            cv2.imwrite("%s/%d_ORIImg.jpg" % (dir, index), original_img)

            prediction = int(pred[0])
            weight = w[prediction,:]
            activation_img = np.zeros((featuremap.shape[1],featuremap.shape[2]))
            for num2 in range(featuremap.shape[0]):
                feature_img = featuremap[num2, :, :] * weight[num2]
                activation_img += feature_img
            activation_img = (activation_img - np.min(activation_img)) / (
                        np.max(activation_img) - np.min(activation_img)) * 255.0
            activation_img = cv2.resize(activation_img, (500, 500))
            activation_img = activation_img.astype(np.uint8)
            image = cv2.applyColorMap(activation_img, cv2.COLORMAP_JET)
            add_img = cv2.addWeighted(image, 0.7, original_img, 0.3, 0)
            cv2.imwrite("%s/%d_AttImg.jpg" % (dir, index), image)
            cv2.imwrite("%s/%d_AddImg.jpg" % (dir, index), add_img)
            index += 1






if __name__ == '__main__':
    BATCH_SIZE = 1  # 大概需要2G的显存
    DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")  # 让torch判断是否使用GPU,建议使用GPU环境,因为会快很多
    # 下载测试集
    test_loader = DataLoader(
        test_ImageDataset("./test"),
        batch_size=BATCH_SIZE,
        shuffle=False,
        num_workers=0,
    )
    model = cnn().to(DEVICE)
    model.load_state_dict(torch.load("./saved_model/AlexNet_oriCAM_CAT&DOG_iteration_50.pth"))
    test(model,DEVICE,test_loader)

由上图可见效果还是不错滴~

  • 2
    点赞
  • 4
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
激活函数在神经网络起到了非常重要的作用,它们引入了非线性因素,提高了神经网络对模型的表达能力,解决了线性模型所不能解决的问题。在PyTorch,常见的激活函数有sigmoid函数、tanh函数和ReLU函数。 Sigmoid函数是一个常用的激活函数,它将输入值压缩到区间\[0, 1\]之间,保证了数据的稳定性和波动幅度的小。然而,Sigmoid函数在两端的饱和区梯度趋近于0,容易出现梯度消失或梯度爆炸的问题。 Tanh函数是另一个常见的激活函数,它将输入值压缩到区间\[-1, 1\]之间,解决了Sigmoid函数非0均值的问题。然而,Tanh函数也存在着梯度消失或梯度爆炸的问题,并且运算量较大。 ReLU函数是目前使用最广泛的激活函数,它将负值设为0,保留正值。ReLU函数的优点是计算简单,且不存在梯度消失的问题。然而,ReLU函数的缺点是输出不是0均值,导致反向传播时更新方向不一致,收敛速度较慢。 在PyTorch,可以使用torch.nn的相应函数来实现这些激活函数。例如,torch.nn.Sigmoid()可以用来实现Sigmoid函数,torch.nn.Tanh()可以用来实现Tanh函数,torch.nn.ReLU()可以用来实现ReLU函数。 #### 引用[.reference_title] - *1* *2* *3* [PyTorch激活函数](https://blog.csdn.net/qq_42504905/article/details/116841325)[target="_blank" data-report-click={"spm":"1018.2226.3001.9630","extra":{"utm_source":"vip_chatgpt_common_search_pc_result","utm_medium":"distribute.pc_search_result.none-task-cask-2~all~insert_cask~default-1-null.142^v91^insertT0,239^v3^insert_chatgpt"}} ] [.reference_item] [ .reference_list ]

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值