mmdetection对特征图进行可视化
思路:在前向传播时将四个stage的特征图返回出来(更简单的方法在我下一篇博客,欢迎阅读)
1.two_stage.py修改
我修改的地方都用 # for visualization 标出来了,目前处于注释状态,使用时把这部分取消注释,原文件相应的部分进行注释
import torch
import torch.nn as nn
# from mmdet.core import bbox2result, bbox2roi, build_assigner, build_sampler
from ..builder import DETECTORS, build_backbone, build_head, build_neck
from .base import BaseDetector
@DETECTORS.register_module()
class TwoStageDetector(BaseDetector):
"""Base class for two-stage detectors.
Two-stage detectors typically consisting of a region proposal network and a
task-specific regression head.
"""
def __init__(self,
backbone,
neck=None,
rpn_head=None,
roi_head=None,
train_cfg=None,
test_cfg=None,
pretrained=None):
super(TwoStageDetector, self).__init__()
self.backbone = build_backbone(backbone)
if neck is not None:
self.neck = build_neck(neck)
if rpn_head is not None:
rpn_train_cfg = train_cfg.rpn if train_cfg is not None else None
rpn_head_ = rpn_head.copy()
rpn_head_.update(train_cfg=rpn_train_cfg, test_cfg=test_cfg.rpn)
self.rpn_head = build_head(rpn_head_)
if roi_head is not None:
# update train and test cfg here for now
# TODO: refactor assigner & sampler
rcnn_train_cfg = train_cfg.rcnn if train_cfg is not None else None
roi_head.update(train_cfg=rcnn_train_cfg)
roi_head.update(test_cfg=test_cfg.rcnn)
self.roi_head = build_head(roi_head)
self.train_cfg = train_cfg
self.test_cfg = test_cfg
self.init_weights(pretrained=pretrained)
@property
def with_rpn(self):
"""bool: whether the detector has RPN"""
return hasattr(self, 'rpn_head') and self.rpn_head is not None
@property
def with_roi_head(self):
"""bool: whether the detector has a RoI head"""
return hasattr(self, 'roi_head') and self.roi_head is not None
def init_weights(self, pretrained=None):
"""Initialize the weights in detector.
Args:
pretrained (str, optional): Path to pre-trained weights.
Defaults to None.
"""
super(TwoStageDetector, self).init_weights(pretrained)
self.backbone.init_weights(pretrained=pretrained)
if self.with_neck:
if isinstance(self.neck, nn.Sequential):
for m in self.neck:
m.init_weights()
else:
self.neck.init_weights()
if self.with_rpn:
self.rpn_head.init_weights()
if self.with_roi_head:
self.roi_head.init_weights(pretrained)
def extract_feat(self, img):
"""Directly extract features from the backbone+neck."""
x = self.backbone(img)
if self.with_neck:
x = self.neck(x)
# _x = self.neck(x) # for visualization
return x
# return _x,x #for visualization
def forward_dummy(self, img):
"""Used for computing network flops.
See `mmdetection/tools/get_flops.py`
"""
outs = ()
# backbone
x = self.extract_feat(img)
# x,_x = self.extract_feat(img) #for visualization
# rpn
if self.with_rpn:
rpn_outs = self.rpn_head(x)
outs = outs + (rpn_outs, )
proposals = torch.randn(1000, 4).to(img.device)
# roi_head
roi_outs = self.roi_head.forward_dummy(x, proposals)
outs = outs + (roi_outs, )
return outs
def forward_train(self,
img,
img_metas,
gt_bboxes,
gt_labels,
gt_bboxes_ignore=None,
gt_masks=None,
proposals=None,
**kwargs):
"""
Args:
img (Tensor): of shape (N, C, H, W) encoding input images.
Typically these should be mean centered and std scaled.
img_metas (list[dict]): list of image info dict where each dict
has: 'img_shape', 'scale_factor', 'flip', and may also contain
'filename', 'ori_shape', 'pad_shape', and 'img_norm_cfg'.
For details on the values of these keys see
`mmdet/datasets/pipelines/formatting.py:Collect`.
gt_bboxes (list[Tensor]): Ground truth bboxes for each image with
shape (num_gts, 4) in [tl_x, tl_y, br_x, br_y] format.
gt_labels (list[Tensor]): class indices corresponding to each box
gt_bboxes_ignore (None | list[Tensor]): specify which bounding
boxes can be ignored when computing the loss.
gt_masks (None | Tensor) : true segmentation masks for each box
used if the architecture supports a segmentation task.
proposals : override rpn proposals with custom proposals. Use when
`with_rpn` is False.
Returns:
dict[str, Tensor]: a dictionary of loss components
"""
x = self.extract_feat(img)
# x,_x = self.extract_feat(img) #for visualization
losses = dict()
# RPN forward and loss
if self.with_rpn:
proposal_cfg = self.train_cfg.get('rpn_proposal',
self.test_cfg.rpn)
rpn_losses, proposal_list = self.rpn_head.forward_train(
x,
img_metas,
gt_bboxes,
gt_labels=None,
gt_bboxes_ignore=gt_bboxes_ignore,
proposal_cfg=proposal_cfg)
losses.update(rpn_losses)
else:
proposal_list = proposals
roi_losses = self.roi_head.forward_train(x, img_metas, proposal_list,
gt_bboxes, gt_labels,
gt_bboxes_ignore, gt_masks,
**kwargs)
losses.update(roi_losses)
return losses
async def async_simple_test(self,
img,
img_meta,
proposals=None,
rescale=False):
"""Async test without augmentation."""
assert self.with_bbox, 'Bbox head must be implemented.'
x = self.extract_feat(img)
# x,_x = self.extract_feat(img) #for visualization
if proposals is None:
proposal_list = await self.rpn_head.async_simple_test_rpn(
x, img_meta)
else:
proposal_list = proposals
return await self.roi_head.async_simple_test(
x, proposal_list, img_meta, rescale=rescale)
def simple_test(self, img, img_metas, proposals=None, rescale=False):
"""Test without augmentation."""
assert self.with_bbox, 'Bbox head must be implemented.'
x = self.extract_feat(img)
# x,_x = self.extract_feat(img) #for visualization
if proposals is None:
proposal_list = self.rpn_head.simple_test_rpn(x, img_metas)
else:
proposal_list = proposals
return self.roi_head.simple_test(
x, proposal_list, img_metas, rescale=rescale)
# return _x,self.roi_head.simple_test(
# x, proposal_list, img_metas, rescale=rescale) #for visualization
def aug_test(self, imgs, img_metas, rescale=False):
"""Test with augmentations.
If rescale is False, then returned bboxes and masks will fit the scale
of imgs[0].
"""
x = self.extract_feats(imgs)
# x,_x = self.extract_feats(imgs) #for visualization
proposal_list = self.rpn_head.aug_test_rpn(x, img_metas)
return self.roi_head.aug_test(
x, proposal_list, img_metas, rescale=rescale)
2.inference.py
同上,修改的地方都用 # for visualization 标出来了
...
def inference_detector(model, img):
"""Inference image(s) with the detector.
Args:
model (nn.Module): The loaded detector.
imgs (str/ndarray or list[str/ndarray]): Either image files or loaded
images.
Returns:
If imgs is a str, a generator will be returned, otherwise return the
detection results directly.
"""
cfg = model.cfg
device = next(model.parameters()).device # model device
# prepare data
if isinstance(img, np.ndarray):
# directly add img
data = dict(img=img)
cfg = cfg.copy()
# set loading pipeline type
cfg.data.test.pipeline[0].type = 'LoadImageFromWebcam'
else:
# add information into dict
data = dict(img_info=dict(filename=img), img_prefix=None)
# build the data pipeline
test_pipeline = Compose(cfg.data.test.pipeline)
data = test_pipeline(data)
data = collate([data], samples_per_gpu=1)
if next(model.parameters()).is_cuda:
# scatter to specified GPU
data = scatter(data, [device])[0]
else:
for m in model.modules():
assert not isinstance(
m, RoIPool
), 'CPU inference with RoIPool is not supported currently.'
# just get the actual data from DataContainer
data['img_metas'] = data['img_metas'][0].data
# forward the model
with torch.no_grad():
result = model(return_loss=False, rescale=True, **data)[0]
# _ = model(return_loss=False, rescale=True, **data)[0] # for visualization
# result = model(return_loss=False, rescale=True, **data)[1][0] # for visualization
return result
# return _,result # for visualization
...
3.在tools文件下添加visualation.py
*这部分参考https://www.cnblogs.com/aimhabo/p/13770395.html
#coding: utf-8
import cv2
import mmcv
import numpy as np
import os
import torch
from mmdet.apis import inference_detector, init_detector, show_result_pyplot
def featuremap_2_heatmap(feature_map):
assert isinstance(feature_map, torch.Tensor)
feature_map = feature_map.detach()
heatmap = feature_map[:,0,:,:]*0
for c in range(feature_map.shape[1]):
heatmap+=feature_map[:,c,:,:]
heatmap = heatmap.cpu().numpy()
heatmap = np.mean(heatmap, axis=0)
heatmap = np.maximum(heatmap, 0)
heatmap /= np.max(heatmap)
return heatmap
def draw_feature_map(model, img_path, save_dir):
'''
:param model: 加载了参数的模型
:param img_path: 测试图像的文件路径
:param save_dir: 保存生成图像的文件夹
:return:
'''
img = mmcv.imread(img_path)
modeltype = str(type(model)).split('.')[-1].split('\'')[0]
model.eval()
model.draw_heatmap = True
featuremaps,result = inference_detector(model, img) #这里需要改model,让其在forward的最后return特征图。我这里return的是一个Tensor的tuple,每个Tensor对应一个level上输出的特征图。
i=0
for featuremap in featuremaps:
heatmap = featuremap_2_heatmap(featuremap)
heatmap = cv2.resize(heatmap, (img.shape[1], img.shape[0])) # 将热力图的大小调整为与原始图像相同
heatmap = np.uint8(255 * heatmap) # 将热力图转换为RGB格式
heatmap = cv2.applyColorMap(heatmap, cv2.COLORMAP_JET) # 将热力图应用于原始图像
superimposed_img = heatmap * 0.5 + img*0.3 # 这里的0.4是热力图强度因子
# cv2.imshow("1",superimposed_img)
# cv2.waitKey(0)
# cv2.destroyAllWindows()
cv2.imwrite(os.path.join(save_dir,'featuremap_'+str(i)+'.png'), superimposed_img) # 将图像保存到硬盘
i=i+1
show_result_pyplot(model, img, result, score_thr=0.05)
from argparse import ArgumentParser
def main():
parser = ArgumentParser()
parser.add_argument('img', help='Image file')
parser.add_argument('save_dir', help='Dir to save heatmap image')
parser.add_argument('config', help='Config file')
parser.add_argument('checkpoint', help='Checkpoint file')
parser.add_argument('--device', default='cuda:0', help='Device used for inference')
args = parser.parse_args()
# build the model from a config file and a checkpoint file
model = init_detector(args.config, args.checkpoint, device=args.device)
draw_feature_map(model,args.img,args.save_dir)
if __name__ == '__main__':
main()
4.在你的项目下面新建一个存放特征图的文件夹,我这里新建了一个featuremaps文件夹,产生的热力图都在这个文件夹下面
参数设置为如下格式即可
img save_dir config checkpoint
demo/demo.jpg featuremaps configs/ checkpoints/
5.结果
自己训练的模型结果有点差,热力图可视化没有问题