Conditional Convolutions for Instance Segmentation
如果对你帮助的话,希望给我个赞~
文章目录
网络结构
mask head
总的来说,Condinst == FCOS(cls + reg + ctrness) + FCOS Head的top_feats(也就是dynamic_mask_head, channel: 256 --> 169) + 从FPN(论文里是P3层,不过我看代码的self.in_features是[‘p3’, ‘p4’, ‘p5’]? 接着引入refine结构,然后在一起做一个sum。然后引入tower结构,channel: 128 --> 8)。
top_feats,refine,tower module这三个网络结构见:
'''
top_feats
in CondInst:
(Pdb) top_feats[0].size()
torch.Size([2, 169, 100, 152])
(Pdb) top_feats[1].size()
torch.Size([2, 169, 50, 76])
(Pdb) top_feats[2].size()
torch.Size([2, 169, 25, 38])
(Pdb) top_feats[3].size()
torch.Size([2, 169, 13, 19])
(Pdb) top_feats[4].size()
torch.Size([2, 169, 7, 10])
'''
'''
MaskBranch(
(refine): ModuleList(
(0): Sequential(
(0): Conv2d(256, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(2): ReLU(inplace=True)
)
(1): Sequential(
(0): Conv2d(256, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(2): ReLU(inplace=True)
)
(2): Sequential(
(0): Conv2d(256, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(2): ReLU(inplace=True)
)
)
(tower): Sequential(
(0): Sequential(
(0): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(2): ReLU(inplace=True)
)
(1): Sequential(
(0): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(2): ReLU(inplace=True)
)
(2): Sequential(
(0): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(2): ReLU(inplace=True)
)
(3): Sequential(
(0): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(2): ReLU(inplace=True)
)
(4): Conv2d(128, 8, kernel_size=(1, 1), stride=(1, 1))
)
)
'''
LOSS
1. AdelaiDet/adet/modeling/condinst/condinst.py
# -*- coding: utf-8 -*-
import logging
import torch
from torch import nn
import torch.nn.functional as F
from detectron2.structures import ImageList
from detectron2.modeling.proposal_generator import build_proposal_generator
from detectron2.modeling.backbone import build_backbone
from detectron2.modeling.meta_arch.build import META_ARCH_REGISTRY
from detectron2.structures.instances import Instances
from detectron2.structures.masks import PolygonMasks, polygons_to_bitmask
from .dynamic_mask_head import build_dynamic_mask_head
from .mask_branch import build_mask_branch
from adet.utils.comm import aligned_bilinear
import pdb
__all__ = ["CondInst"]
logger = logging.getLogger(__name__)
@META_ARCH_REGISTRY.register()
class CondInst(nn.Module):
"""
Main class for CondInst architectures (see https://arxiv.org/abs/2003.05664).
"""
def __init__(self, cfg):
super().__init__()
self.device = torch.device(cfg.MODEL.DEVICE) # CUDA
self.backbone = build_backbone(cfg) # build_fcos_resnet_fpn_backbone
self.proposal_generator = build_proposal_generator(cfg, self.backbone.output_shape()) # FCOS
self.mask_head = build_dynamic_mask_head(cfg) # CondInst mask_head
self.mask_branch = build_mask_branch(cfg, self.backbone.output_shape()) # ConInst mask_branch
self.mask_out_stride = cfg.MODEL.CONDINST.MASK_OUT_STRIDE # 4 downsampling
self.max_proposals = cfg.MODEL.CONDINST.MAX_PROPOSALS # -1
# build top module
in_channels = self.proposal_generator.in_channels_to_top_module # 256
self.controller = nn.Conv2d( # [256, 169]
in_channels, self.mask_head.num_gen_params,
kernel_size=3, stride=1, padding=1
)
torch.nn.init.normal_(self.controller.weight, std=0.01)
torch.nn.init.constant_(self.controller.bias, 0)
pixel_mean = torch.Tensor(cfg.MODEL.PIXEL_MEAN).to(self.device).view(3, 1, 1)
pixel_std = torch.Tensor(cfg.MODEL.PIXEL_STD).to(self.device).view(3, 1, 1)
self.normalizer = lambda x: (x - pixel_mean) / pixel_std
self.to(self.device) # 加入cuda
pdb.set_trace()
def forward(self, batched_inputs):
images = [x["image"].to(self.device) for x in batched_inputs] # images放入device
images = [self.normalizer(x) for x in images]
images = ImageList.from_tensors(images, self.backbone.size_divisibility) # torch.Size([2, 3, 768, 1248])
pdb.set_trace()
features = self.backbone(images.tensor) # forward build_fcos_resnet_fpn_backbone len = 5
if "instances" in batched_inputs[0]:
gt_instances = [x["instances"].to(self.device) for x in batched_inputs] # len(gt_instances) = batch_size ,一共有gt_instances[0:batch_size]
self.add_bitmasks(gt_instances, images.tensor.size(-2), images.tensor.size(-1))
else:
gt_instances = None
pdb.set_trace()
mask_feats, sem_losses = self.mask_branch(features, gt_instances) # forward mask_branch
proposals, proposal_losses = self.proposal_generator( # forward FCOS
images, features, gt_instances, self.controller
)
if self.training:
loss_mask = self._forward_mask_heads_train(proposals, mask_feats, gt_instances) # 调用_forward_mask_heads_train
losses = {
}
losses.update(sem_losses)
losses.update(proposal_losses)
losses.update({
"loss_mask": loss_mask})
pdb.set_trace()
return losses
else: # test
pred_instances_w_masks = self._forward_mask_heads_test(proposals, mask_feats) # 调用 _forward_mask_heads_test
padded_im_h, padded_im_w = images.tensor.size()[-2:]
processed_results = []
for im_id, (input_per_image, image_size) in enumerate(zip(batched_inputs, images.image_sizes)):
height = input_per_image.get("height", image_size[0])
width = input_per_image.get("width", image_size[1])
instances_per_im = pred_instances_w_masks[pred_instances_w_masks.im_inds == im_id]
instances_per_im = self.postprocess( # 调用 postprocess
instances_per_im, height, width,
padded_im_h, padded_im_w
)
processed_results.append({
"instances": instances_per_im
})
return processed_results
def _forward_mask_heads_train(self, proposals, mask_feats, gt_instances):
# prepare the inputs for mask heads
pred_instances = proposals["instances"] # len 160
if 0 <= self.max_proposals < len(pred_instances): # self.max_proposals 500
inds = torch.randperm(len(pred_instances), device=mask_feats.device).long()
logger.info("clipping proposals from {} to {}".format(
len(pred_instances), self.max_proposals
))
pred_instances = pred_instances[inds[:self.max_proposals]]
pred_instances.mask_head_params = pred_instances.top_feats # [160, 169]
loss_mask =</