CAM激活图可视化系列(代码直接可用)——GradCAM(Pytorch官方版plus+可自定义修改版)
原理
原作者论文CAM原理图
特征图关于这个类别分数的梯度(维度为[C, H, W])。最后对特征图梯度的空间维度计算平均值,得到与类别信息有关且与特征图通道数一致的权重,再根据权重将原图与热力图(激活图)叠加即可。其中叠加的热力图要与原图大小一致(所以其中可能有加入一些如插值等的图像增强操作)
官方模块安装
pip install grad-cam
注意
GradCAM代码(本人修改:直接可用——官方版PLUS)
import os
import numpy as np
import torch
from PIL import Image
import matplotlib.pyplot as plt
from torchvision import models
from torchvision import transforms
from pytorch_grad_cam import GradCAM
from pytorch_grad_cam.utils.image import show_cam_on_image
from pytorch_grad_cam.utils.model_targets import ClassifierOutputTarget
#####torch 模块项目(官方版+进阶使用)来源:https://github.com/jacobgil/pytorch-grad-cam####
####原理参考:https://blog.csdn.net/qq_37541097/article/details/123089851
######CAM 模块化代码(黑盒)
def Grad_CAM_perBox(model, target_layers, img_path, save_path, target_category):
image = img_path
# 此部分可抽取出来做成模块,然后列表使用
data_transform = transforms.Compose([transforms.ToTensor(),
transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])])
# load image
img_path = image
assert os.path.exists(img_path), "file: '{}' dose not exist.".format(img_path)
img = Image.open(img_path).convert('RGB')
img = np.array(img, dtype=np.uint8)
# [N, C, H, W]
img_tensor = data_transform(img)
# expand batch dimension
input_tensor = torch.unsqueeze(img_tensor, dim=0)
cam = GradCAM(model=model, target_layers=target_layers, use_cuda=False)
#####torch官方用法
target_category = target_category
####target_category = None ####默认获取最大概率类别获得CAM图
grayscale_cam = cam(input_tensor=input_tensor, targets=target_category)
# #####按自己输入的类别编码获取CAM图
# targets = [ClassifierOutputTarget(target_category)]
#
# grayscale_cam = cam(input_tensor=input_tensor, targets=targets)
grayscale_cam = grayscale_cam[0, :]
visualization = show_cam_on_image(img.astype(dtype=np.float32) / 255.,
grayscale_cam,
use_rgb=True)
plt.imshow(visualization)
plt.savefig(save_path)
plt.show()
if __name__ == '__main__':
####模型可换成自己的或其他的models
model = models.mobilenet_v3_large(pretrained=True)
target_layers = [model.features[-1]]
# model = models.vgg16(pretrained=True)
# target_layers = [model.features]
# model = models.resnet34(pretrained=True)
# target_layers = [model.layer4]
# model = models.regnet_y_800mf(pretrained=True)
# target_layers = [model.trunk_output]
# model = models.efficientnet_b0(pretrained=True)
# target_layers = [model.features]
img_path = "./cat.png"
save_path = "./CAM.png"
# target_category = 281 # tabby, tabby cat
# target_category = 254 # pug, pug-dog
target_category = None
Grad_CAM_perBox(model, target_layers, img_path, save_path, target_category)
结果对比
GradCAM代码(简书+本人修改:直接可用——可自定义修改版)
######CAM 详细实现原理(可更改代码):https://www.jianshu.com/p/fd2f09dc3cc9
######Grad_CAM 详细实现原理(可更改代码):https://www.jianshu.com/p/fd2f09dc3cc9
import math
import numpy as np
import torch
from torch import Tensor
from torch import nn
import torch.nn.functional as F
from typing import Optional, List
import torchvision.transforms as transforms
from PIL import Image
import torchvision.models as models
from torch import Tensor
from matplotlib import cm
from torchvision.transforms.functional import to_pil_image
def Grad_CAM_perModify(img_path,save_path,net,target_layers):
preprocess = transforms.Compose([transforms.Resize(256),
transforms.CenterCrop(224),
transforms.ToTensor(),
transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])])
feature_map = [] # 建立列表容器,用于盛放输出特征图
def forward_hook(module, inp, outp): # 定义hook
feature_map.append(outp) # 把输出装入字典feature_map
target_layers.register_forward_hook(forward_hook) # 对net.layer4这一层注册前向传播
feature_map = [] # 建立列表容器,用于盛放输出特征图
def forward_hook(module, inp, outp): # 定义hook
feature_map.append(outp) # 把输出装入字典feature_map
target_layers.register_forward_hook(forward_hook) # 对net.layer4这一层注册前向传播
grad = [] # 建立列表容器,用于盛放特征图的梯度
def backward_hook(module, inp, outp): # 定义hook
grad.append(outp) # 把输出装入列表grad
target_layers.register_full_backward_hook(backward_hook) # 对net.features这一层注册反向传播
orign_img = Image.open(img_path).convert('RGB') # 打开图片并转换为RGB模型
img = preprocess(orign_img) # 图片预处理
img = torch.unsqueeze(img, 0) # 增加batch维度 [1, 3, 224, 224]
# out = net(img.cuda()) # 前向传播
out = net(img) # 前向传播
###自动获取预测类别编码
cls_idx = torch.argmax(out).item() # 获取预测类别编码
###或者自行指定类别编码
# cls_idx = 281
score = out[:, cls_idx].sum() # 获取预测类别分数
net.zero_grad()
score.backward(retain_graph=True) # 由预测类别分数反向传播
weights = grad[0][0].squeeze(0).mean(dim=(1, 2)) # 获得权重
grad_cam = (weights.view(*weights.shape, 1, 1) * feature_map[0].squeeze(0)).sum(0)
def _normalize(cams: Tensor) -> Tensor:
"""CAM normalization"""
cams.sub_(cams.flatten(start_dim=-2).min(-1).values.unsqueeze(-1).unsqueeze(-1))
cams.div_(cams.flatten(start_dim=-2).max(-1).values.unsqueeze(-1).unsqueeze(-1))
return cams
grad_cam = _normalize(F.relu(grad_cam, inplace=True)).cpu()
mask = to_pil_image(grad_cam.detach().numpy(), mode='F')
def overlay_mask(img: Image.Image, mask: Image.Image, colormap: str = 'jet', alpha: float = 0.6) -> Image.Image:
"""Overlay a colormapped mask on a background image
Args:
img: background image
mask: mask to be overlayed in grayscale
colormap: colormap to be applied on the mask
alpha: transparency of the background image
Returns:
overlayed image
"""
if not isinstance(img, Image.Image) or not isinstance(mask, Image.Image):
raise TypeError('img and mask arguments need to be PIL.Image')
if not isinstance(alpha, float) or alpha < 0 or alpha >= 1:
raise ValueError('alpha argument is expected to be of type float between 0 and 1')
cmap = cm.get_cmap(colormap)
# Resize mask and apply colormap
overlay = mask.resize(img.size, resample=Image.BICUBIC)
overlay = (255 * cmap(np.asarray(overlay) ** 2)[:, :, 1:]).astype(np.uint8)
# Overlay the image with the mask
overlayed_img = Image.fromarray((alpha * np.asarray(img) + (1 - alpha) * overlay).astype(np.uint8))
return overlayed_img
result = overlay_mask(orign_img, mask)
result.show()
result.save(save_path)
if __name__ == '__main__':
img_path = "./cat2.png"
save_path = "./CAM2.png"
# net = models.mobilenet_v3_large(pretrained=True)
# net = models.vgg11_bn(pretrained=True).cuda() # 导入模型
net = models.vgg11_bn(pretrained=True) # 导入模型
# print(net)
### 指定激活(可视化)哪一层
target_layers = net.features
Grad_CAM_perModify(img_path,save_path,net,target_layers)
另一组实验结果:
后记:GradCAM本身其实只是一个工具,最后呈现的效果还是由所用模型+(类别编码)本身决定
PLUS相关参考
原理参考:https://blog.csdn.net/qq_37541097/article/details/123089851
torch 模块项目(官方版+进阶使用)来源:https://github.com/jacobgil/pytorch-grad-cam
CAM 详细实现原理(可更改代码):https://www.jianshu.com/p/fd2f09dc3cc9
Grad_CAM 详细实现原理(可更改代码):https://www.jianshu.com/p/fd2f09dc3cc9