以Pytorch的VGG预训练模型为例,贴一下CAM(Class Activation Map)的核心代码。
img_path = r'elephant.jpg'
img = Image.open(img_path).convert('RGB')
transforms = torchvision.transforms.Compose([
torchvision.transforms.ToTensor(),
torchvision.transforms.Normalize([0.5, ], [0.5, ])
])
data = transforms(img).unsqueeze(0)
# 加载预训练模型
model = torchvision.models.vgg11_bn(pretrained=True)
model.eval()
features = model.features(data)
features.retain_grad() # 保留特征层的梯度 或者用register_hook也可以取到 但比较麻烦
t = model.avgpool(features)
t = t.reshape(1, -1)
output = model.classifier(t)[0]
# 预测得分最高的那一类对应的输出值
pred = torch.argmax(output).item()
pred_class = output[pred]
pred_class.backward() # 计算梯度
grads = features.grad # 获取梯度
'''
计算每层特征图的平均梯度 每层特征图乘上该层的平均梯度 最后所有层再平均成一层原始热力图 经ReLu激活后 压缩至(0,1)
features:(1, 512, h, w) grad:(1, 512, h, w) avg_grads:(512)
为了不用循环 将avg_grads扩充成(h, w, 512)->(512, h, w) 与 features直接相乘
'''
features = features[0]
avg_grads = torch.mean(grads[0], dim=(1, 2))
avg_grads = avg_grads.expand(features.shape[1], features.shape[2], features.shape[0]).permute(2, 0, 1)
features *= avg_grads
heatmap = features.detach().cpu().numpy()
heatmap = np.mean(heatmap, axis=0)
heatmap = np.maximum(heatmap, 0)
heatmap /= (np.max(heatmap) + 1e-8)
# 将热力图的大小调整为与原始图像相同 乘255转成灰度图 映射成彩图 最后和原图按比例叠加
img = cv2.imread(img_path)
heatmap = cv2.resize(heatmap, (img.shape[1], img.shape[0]))
heatmap = np.uint8(255 * heatmap)
heatmap = cv2.applyColorMap(heatmap, cv2.COLORMAP_JET)
superimposed_img = np.uint8(heatmap * 0.5 + img * 0.5)
cv2.imshow('1', superimposed_img)
cv2.waitKey(0)