YOLOv5热力图可视化GradCAM

本文是针对yolov5热力图可视化的实现过程,并指出其中需要更改的地方。更改后便可以即插即用,较为简单。

一、代码

源码地址:yolo-gradcam

Github上的代码已经复制在下方,如下所示。

import warnings
warnings.filterwarnings('ignore')
warnings.simplefilter('ignore')
import torch, yaml, cv2, os, shutil
import numpy as np
np.random.seed(0)
import matplotlib.pyplot as plt
from tqdm import trange
from PIL import Image
from models.yolo import Model
from utils.general import intersect_dicts
from utils.augmentations import letterbox
from utils.general import xywh2xyxy
from pytorch_grad_cam import GradCAMPlusPlus, GradCAM, XGradCAM
from pytorch_grad_cam.utils.image import show_cam_on_image
from pytorch_grad_cam.activations_and_gradients import ActivationsAndGradients

class yolov5_heatmap:
    def __init__(self, weight, cfg, device, method, layer, backward_type, conf_threshold, ratio):
        device = torch.device(device)
        ckpt = torch.load(weight)
        model_names = ckpt['model'].names
        csd = ckpt['model'].float().state_dict()  # checkpoint state_dict as FP32
        model = Model(cfg, ch=3, nc=len(model_names)).to(device)
        csd = intersect_dicts(csd, model.state_dict(), exclude=['anchor'])  # intersect
        model.load_state_dict(csd, strict=False)  # load
        model.eval()
        print(f'Transferred {len(csd)}/{len(model.state_dict())} items')
        
        target_layers = [eval(layer)]
        method = eval(method)

        colors = np.random.uniform(0, 255, size=(len(model_names), 3)).astype(np.int)
        self.__dict__.update(locals())
    
    def post_process(self, result):
        logits_ = result[..., 4:]
        boxes_ = result[..., :4]
        sorted, indices = torch.sort(logits_[..., 0], descending=True)
        return logits_[0][indices[0]], xywh2xyxy(boxes_[0][indices[0]]).cpu().detach().numpy()

    def draw_detections(self, box, color, name, img):
        xmin, ymin, xmax, ymax = list(map(int, list(box)))
        cv2.rectangle(img, (xmin, ymin), (xmax, ymax), tuple(int(x) for x in color), 2)
        cv2.putText(img, str(name), (xmin, ymin - 5), cv2.FONT_HERSHEY_SIMPLEX, 0.8, tuple(int(x) for x in color), 2, lineType=cv2.LINE_AA)
        return img

    def __call__(self, img_path, save_path):
        # remove dir if exist
        if os.path.exists(save_path):
            shutil.rmtree(save_path)
        # make dir if not exist
        os.makedirs(save_path, exist_ok=True)

        # img process
        img = cv2.imread(img_path)
        img = letterbox(img)[0]
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        img = np.float32(img) / 255.0
        tensor = torch.from_numpy(np.transpose(img, axes=[2, 0, 1])).unsqueeze(0).to(self.device)

        # init ActivationsAndGradients
        grads = ActivationsAndGradients(self.model, self.target_layers, reshape_transform=None)

        # get ActivationsAndResult
        result = grads(tensor)
        activations = grads.activations[0].cpu().detach().numpy()

        # postprocess to yolo output
        post_result, post_boxes = self.post_process(result[0])
        for i in trange(int(post_result.size(0) * self.ratio)):
            if post_result[i][0] < self.conf_threshold:
                break

            self.model.zero_grad()
            if self.backward_type == 'conf':
                post_result[i, 0].backward(retain_graph=True)
            else:
                # get max probability for this prediction
                score = post_result[i, 1:].max()
                score.backward(retain_graph=True)

            # process heatmap
            gradients = grads.gradients[0]
            b, k, u, v = gradients.size()
            weights = self.method.get_cam_weights(self.method, None, None, None, activations, gradients.detach().numpy())
            weights = weights.reshape((b, k, 1, 1))
            saliency_map = np.sum(weights * activations, axis=1)
            saliency_map = np.squeeze(np.maximum(saliency_map, 0))
            saliency_map = cv2.resize(saliency_map, (tensor.size(3), tensor.size(2)))
            saliency_map_min, saliency_map_max = saliency_map.min(), saliency_map.max()
            if (saliency_map_max - saliency_map_min) == 0:
                continue
            saliency_map = (saliency_map - saliency_map_min) / (saliency_map_max - saliency_map_min)

            # add heatmap and box to image
            cam_image = show_cam_on_image(img.copy(), saliency_map, use_rgb=True)
            cam_image = self.draw_detections(post_boxes[i], self.colors[int(post_result[i, 1:].argmax())], f'{self.model_names[int(post_result[i, 1:].argmax())]} {post_result[i][0]:.2f}', cam_image)
            cam_image = Image.fromarray(cam_image)
            cam_image.save(f'{save_path}/{i}.png')

def get_params():
    params = {
        'weight': 'runs/train/exp/weights/best.pt',
        'cfg': 'models/yolov5m.yaml',
        'device': 'cuda:0',
        'method': 'XGradCAM', # GradCAMPlusPlus, GradCAM, XGradCAM
        'layer': 'model.model[-2]',
        'backward_type': 'class', # class or conf
        'conf_threshold': 0.6, # 0.6
        'ratio': 0.02 # 0.02-0.1
    }
    return params

if __name__ == '__main__':
    model = yolov5_heatmap(**get_params())
    model(r'dataset\images\test\aircraft_1064.jpg', 'result')

二、实现步骤

2.1 代码复制

建立一个v5-heatmap.py文件将上述代码复制到工程文件中,如下所示

2.2 环境配置

安装pytorch_grad_cam库,直接pip即可,缺什么pip什么

pip install pytorch_grad_cam

2.3 参数修改

找到get_params(),对其中的参数进行更改

def get_params():
    params = {
        'weight': 'yolov5m.pt',
        'cfg': 'ultralytics/models/yolov5m.yaml',
        'device': 'cuda:0',
        'method': 'GradCAM', # GradCAMPlusPlus, GradCAM, XGradCAM
        'layer': 'model.model[8]',
        'backward_type': 'all', # class, box, all
        'conf_threshold': 0.6, # 0.6
        'ratio': 0.02 # 0.02-0.1
    }
    return params

get_params中的参数:

  • weight:

        模型权重文件,代码默认是yolov5m.pt

  • cfg:

        模型文件,代码默认是yolov5m.yaml,需要注意的是需要跟weight中的预训练文件的配置是一样的,不然会报错

  • device:

        选择使用GPU还是CPU

  • method:

        选择grad-cam方法,默认是GradCAM,这里是提供了几种,可能对效果有点不一样,大家大胆尝试。

  • layer::

        选择需要可视化的层数,只需要修改数字即可,比如想用第9层,也就是model.model[9]。

  • backward_type:

        反向传播的方式,可以是以conf的loss传播,也可以class的loss传播,一般选用all,效果比较好一点。

  • conf_threshold:

        置信度,默认是0.6。

  • ratio:

        默认是0.02,就是用来筛选置信度高的结果,低的就舍弃,0.02则是筛选置信度最高的前2%的图像来进行热力图。

2.4 图片路径修改

将这个代码中的图片路进行更改,换成自己的图片路径,路径不能有中文!!!,否则会报错

if __name__ == '__main__':
    model = yolov5_heatmap(**get_params())
    model(r'图片路径', 'result') #路径不能有中文!!!

2.5 代码运行 

代码运行前首先要对问题进行排除,可以转到第三部分,修改完后直接运行,热力图就出炉了!

运行方法可以直接run也可以在终端中直接输入命令

python v5-heatmap.py

2.6 结果生成 

由于上面保存路径在results,所以图片会在此生成

三、问题bug解决

1.问题一

出现报错 ImportError: cannot import name 'intersect_dicts' from 'utils.general'

解决方法:

由于intersect_dicts移到torch_utils中,在general中找不到,所以将from utils.general import intersect_dicts改为

from utils.torch_utils import intersect_dicts

2.问题二

出现报错 AttributeError: module 'numpy' has no attribute 'int'.

解决方法:

之前有个修改bug的文章中给出了解决方法,此处再直接给大家指出。Numpy报错:AttributeError: module ‘numpy‘ has no attribute ‘int‘-CSDN博客

将下面这个代码

colors = np.random.uniform(0, 255, size=(len(model_names), 3)).astype(np.int)

改为

colors = np.random.uniform(0, 255, size=(len(model_names), 3)).astype(np.int_)

3.问题三

出现报错 RuntimeError: one of the variables needed for gradient computation has been modified by an inplace operation:...

解决方法:

由于inplace设置的原因,所以找到models/yolo.py文件大概106行或者ctrl+F找到以下代码

改为

self.inplace = self.yaml.get('inplace', False)

 四、预测框去除

方法:

将第二个cam_image注释掉

如下所示

五、修改后代码

import warnings

warnings.filterwarnings('ignore')
warnings.simplefilter('ignore')
import torch, yaml, cv2, os, shutil
import numpy as np

np.random.seed(0)
import matplotlib.pyplot as plt
from tqdm import trange
from PIL import Image
from models.yolo import Model
from utils.torch_utils import intersect_dicts
from utils.augmentations import letterbox
from utils.general import xywh2xyxy
from pytorch_grad_cam import GradCAMPlusPlus, GradCAM, XGradCAM
from pytorch_grad_cam.utils.image import show_cam_on_image
from pytorch_grad_cam.activations_and_gradients import ActivationsAndGradients


class yolov5_heatmap:
    def __init__(self, weight, cfg, device, method, layer, backward_type, conf_threshold, ratio):
        device = torch.device(device)
        ckpt = torch.load(weight)
        model_names = ckpt['model'].names
        csd = ckpt['model'].float().state_dict()  # checkpoint state_dict as FP32
        model = Model(cfg, ch=3, nc=len(model_names)).to(device)
        csd = intersect_dicts(csd, model.state_dict(), exclude=['anchor'])  # intersect
        model.load_state_dict(csd, strict=False)  # load
        model.eval()
        print(f'Transferred {len(csd)}/{len(model.state_dict())} items')

        target_layers = [eval(layer)]
        method = eval(method)

        colors = np.random.uniform(0, 255, size=(len(model_names), 3)).astype(np.int_)
        self.__dict__.update(locals())

    def post_process(self, result):
        logits_ = result[..., 4:]
        boxes_ = result[..., :4]
        sorted, indices = torch.sort(logits_[..., 0], descending=True)
        return logits_[0][indices[0]], xywh2xyxy(boxes_[0][indices[0]]).cpu().detach().numpy()

    def draw_detections(self, box, color, name, img):
        xmin, ymin, xmax, ymax = list(map(int, list(box)))
        cv2.rectangle(img, (xmin, ymin), (xmax, ymax), tuple(int(x) for x in color), 2)
        cv2.putText(img, str(name), (xmin, ymin - 5), cv2.FONT_HERSHEY_SIMPLEX, 0.8, tuple(int(x) for x in color), 2,
                    lineType=cv2.LINE_AA)
        return img

    def __call__(self, img_path, save_path):
        # remove dir if exist
        if os.path.exists(save_path):
            shutil.rmtree(save_path)
        # make dir if not exist
        os.makedirs(save_path, exist_ok=True)

        # img process
        img = cv2.imread(img_path)
        img = letterbox(img)[0]
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        img = np.float32(img) / 255.0
        tensor = torch.from_numpy(np.transpose(img, axes=[2, 0, 1])).unsqueeze(0).to(self.device)

        # init ActivationsAndGradients
        grads = ActivationsAndGradients(self.model, self.target_layers, reshape_transform=None)

        # get ActivationsAndResult
        result = grads(tensor)
        activations = grads.activations[0].cpu().detach().numpy()

        # postprocess to yolo output
        post_result, post_boxes = self.post_process(result[0])
        for i in trange(int(post_result.size(0) * self.ratio)):
            if post_result[i][0] < self.conf_threshold:
                break

            self.model.zero_grad()
            if self.backward_type == 'conf':
                post_result[i, 0].backward(retain_graph=True)
            else:
                # get max probability for this prediction
                score = post_result[i, 1:].max()
                score.backward(retain_graph=True)

            # process heatmap
            gradients = grads.gradients[0]
            b, k, u, v = gradients.size()
            weights = self.method.get_cam_weights(self.method, None, None, None, activations,
                                                  gradients.detach().numpy())
            weights = weights.reshape((b, k, 1, 1))
            saliency_map = np.sum(weights * activations, axis=1)
            saliency_map = np.squeeze(np.maximum(saliency_map, 0))
            saliency_map = cv2.resize(saliency_map, (tensor.size(3), tensor.size(2)))
            saliency_map_min, saliency_map_max = saliency_map.min(), saliency_map.max()
            if (saliency_map_max - saliency_map_min) == 0:
                continue
            saliency_map = (saliency_map - saliency_map_min) / (saliency_map_max - saliency_map_min)

            # add heatmap and box to image
            cam_image = show_cam_on_image(img.copy(), saliency_map, use_rgb=True)
            cam_image = self.draw_detections(post_boxes[i], self.colors[int(post_result[i, 1:].argmax())],
                                             f'{self.model_names[int(post_result[i, 1:].argmax())]} {post_result[i][0]:.2f}',
                                             cam_image)            ## 不用预测框则注释 ##
            cam_image = Image.fromarray(cam_image)
            cam_image.save(f'{save_path}/{i}.png')


def get_params():
    params = {
        'weight': 'yolov5m.pt',
        'cfg': 'models/yolov5m.yaml',
        'device': 'cuda:0',
        'method': 'GradCAM',  # GradCAMPlusPlus, GradCAM, XGradCAM
        'layer': 'model.model[9]',
        'backward_type': 'class',  # class or conf
        'conf_threshold': 0.6,  # 0.6
        'ratio': 0.02  # 0.02-0.1
    }
    return params


if __name__ == '__main__':
    model = yolov5_heatmap(**get_params())
    model(r'/data/images/bus.jpg', 'result') # 第一个是检测的文件, 第二个是保存的路径


注:有任何问题欢迎评论区交流讨论或者私信!

### 实现YOLOv11中的力图可视化 为了在YOLOv11中实现力图可视化,可以采用类似于其他YOLO系列模型的方法。具体来说,通过利用梯度加权类激活映射(Grad-CAM)技术来生成力图并将其叠加到原始图像上。 #### 使用Grad-CAM生成力图 Grad-CAM是一种用于解释卷积神经网络决策过程的技术,能够突出显示输入图片中最能影响分类结果的部分[^3]。对于YOLOv11而言,可以通过修改检测头部分的结构以便更好地适应目标检测任务的需求。以下是具体的实施步骤: - **加载预训练好的YOLOv11模型** 加载已经训练完成的YOLOv11权重文件,并设置为评估模式。 - **定义Grad-CAM函数** 创建一个自定义层或钩子(hook),该组件可以在前向传播过程中捕获特定层的输出以及对应的梯度信息。 ```python from pytorch_grad_cam import GradCAM from pytorch_grad_cam.utils.model_targets import ClassifierOutputTarget from pytorch_grad_cam.utils.image import show_cam_on_image def get_heatmap(model, input_tensor, target_layers): cam = GradCAM(model=model, target_layers=target_layers, use_cuda=True) targets = [ClassifierOutputTarget(0)] # 假设我们只关注第一个类别 grayscale_cam = cam(input_tensor=input_tensor, targets=targets)[0, :] return grayscale_cam ``` - **获取力图并与原图融合** 将得到的灰度级力图与原始RGB图像相结合,形成带有颜色标注的结果图。 ```python import cv2 import numpy as np image_path = 'path_to_your_test_image.jpg' img_bgr = cv2.imread(image_path, 1) rgb_img = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2RGB) input_tensor = preprocess(rgb_img).unsqueeze(0) # 对应于您的数据预处理方式 heatmap = get_heatmap(yolov11_model, input_tensor, yolov11_target_layer) visualization = show_cam_on_image(rgb_img / 255., heatmap, use_rgb=True) cv2.imshow('HeatMap Visualization', visualization) cv2.waitKey() ``` 上述代码片段展示了如何基于PyTorch框架下的`pytorch-grad-cam`库,在YOLOv11架构内应用Grad-CAM算法绘制力图的过程。
评论 3
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值