目录
目标检测
目前学术和工业界出现的目标检测算法分成3类:
- 传统的目标检测算法:Cascade + HOG/DPM + Haar/SVM以及上述方法的诸多改进、优化;
- 候选区域/框 + 深度学习分类:通过提取候选区域,并对相应区域进行以深度学习方法为主的分类的方案,如:
R-CNN(Selective Search + CNN + SVM)
SPP-net(ROI Pooling)
Fast R-CNN(Selective Search + CNN + ROI)
Faster R-CNN(RPN + CNN + ROI)
R-FCN
等系列方法; - 基于深度学习的回归方法:YOLO/SSD/DenseBox 等方法;以及最近出现的结合RNN算法的RRC detection;结合DPM的Deformable CNN等
各模块的提出
- ROI Pooling:SPP-net,Fast-RCNN
- Anchor 机制:Faster-RCNN
- RPN:Faster-RCNN
- ROI Align:Mask-RCNN对Faster-RCNN的改进
- FPN:Feature Pyramid Networks for Object Detection,
在faster R-CNN和R-FCN的速度和精度的同时,FPN用特征金字塔进一步提高了精度
fasterrcnn_demo.py解读
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
import argparse
import cv2, os
from maskrcnn_benchmark.config import cfg
from predictor import COCODemo
import time
import pdb
def main():
parser = argparse.ArgumentParser(description="PyTorch Object Detection Webcam Demo")
parser.add_argument(
"--config-file",
default="configs/e2e_faster_rcnn_R_50_C4_1x.yaml",
metavar="FILE",
help="path to config file",
)
parser.add_argument(
"--weights",
default="e2e_faster_rcnn_R_50_C4_1x.pth",
metavar="FILE",
help="path to the trained model",
)
parser.add_argument(
"--images-dir",
default="demo/images",
metavar="DIR",
help="path to demo images directory",
)
parser.add_argument(
"--min-image-size",
type=int,
default=800,
help="Smallest size of the image to feed to the model. "
"Model was trained with 800, which gives best results",
)
parser.add_argument(
"opts",
help="Modify model config options using the command-line",
default=None,
nargs=argparse.REMAINDER,
)
args = parser.parse_args()
# load config from file and command-line arguments
cfg.merge_from_file(args.config_file)
cfg.merge_from_list(args.opts)
cfg.MODEL.WEIGHT = args.weights
cfg.freeze()
# The following per-class thresholds are computed by maximizing
# per-class f-measure in their precision-recall curve.
# Please see compute_thresholds_for_classes() in coco_eval.py for details.
thresholds_for_classes = [
0.23860901594161987, 0.24108672142028809, 0.2470853328704834,
0.2316885143518448, 0.2708061933517456, 0.23173952102661133,
0.31990334391593933, 0.21302376687526703, 0.20151866972446442,
0.20928964018821716, 0.3793887197971344, 0.2715213894844055,
0.2836397588253021, 0.26449233293533325, 0.1728038638830185,
0.314998596906662, 0.28575003147125244, 0.28987520933151245,
0.2727000117301941, 0.23306897282600403, 0.265937477350235,
0.32663893699645996, 0.27102580666542053, 0.29177549481391907,
0.2043062448501587, 0.24331751465797424, 0.20752687752246857,
0.22951272130012512, 0.22753854095935822, 0.2159966081380844,
0.1993938684463501, 0.23676514625549316, 0.20982342958450317,
0.18315598368644714, 0.2489681988954544, 0.24793922901153564,
0.287187397480011, 0.23045086860656738, 0.2462811917066574,
0.21191294491291046, 0.22845126688480377, 0.24365000426769257,
0.22687821090221405, 0.18365581333637238, 0.2035856395959854,
0.23478077352046967, 0.18431290984153748, 0.18184082210063934,
0.2708037495613098, 0.2268175482749939, 0.19970566034317017,
0.21832780539989471, 0.21120598912239075, 0.270445853471756,
0.189377561211586, 0.2101106345653534, 0.2112293541431427,
0.23484709858894348, 0.22701986134052277, 0.20732736587524414,
0.1953316181898117, 0.3237660229206085, 0.3078872859477997,
0.2881140112876892, 0.38746657967567444, 0.20038367807865143,
0.28123822808265686, 0.2588447630405426, 0.2796839773654938,
0.266757994890213, 0.3266656696796417, 0.25759157538414,
0.2578003704547882, 0.17009201645851135, 0.29051828384399414,
0.24002137780189514, 0.22378061711788177, 0.26134759187698364,
0.1730124056339264, 0.1857597529888153
]
demo_im_names = os.listdir(args.images_dir)
# prepare object that handles inference plus adds predictions on top of image
coco_demo = COCODemo(
cfg,
#confidence_thresholds_for_classes=thresholds_for_classes,
min_image_size=args.min_image_size
)
#pdb.set_trace()
for im_name in demo_im_names:
img = cv2.imread(os.path.join(args.images_dir, im_name))
if img is None:
continue
start_time = time.time()
composite = coco_demo.run_on_opencv_image(img)
print("{}\tinference time: {:.2f}s".format(im_name, time.time() - start_time))
cv2.imshow(im_name, composite)
print("Press any keys to exit ...")
cv2.waitKey()
cv2.destroyAllWindows()
if __name__ == "__main__":
main()
import内置库
import argparse
import cv2, os
import time
import pdb
这些都是python内置的模块,如:
import argparse
argparse 是 Python 内置的一个用于命令项选项与参数解析的模块,通过在程序中定义好我们需要的参数,argparse 将会从 sys.argv 中解析出这些参数,并自动生成帮助和使用信息。当然,Python 也有第三方的库可用于命令行解析,而且功能也更加强大,比如 docopt,Click。
参考博客:https://blog.csdn.net/weixin_31866177/article/details/82111586
简单示例
先来看一个简单示例。主要有三个步骤:
- 创建 ArgumentParser() 对象
- 调用 add_argument() 方法添加参数
- 使用 parse_args() 解析添加的参数
参数说明:https://www.iteye.com/blog/songpengfei-1440158
from maskrcnn_benchmark.config import cfg
from predictor import COCODemo
maskrcnn_benchmark.config
在路径G:\pytorch\test\maskrcnn-benchmark-master\maskrcnn_benchmark\config下,去找cfg,打开目录下的__init__.py
内容如下:
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
from .defaults import _C as cfg
可以看出cfg是_C的重命名,找.defaults
,即该目录下的defaults.py
内容如下:
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
import os
from yacs.config import CfgNode as CN
yacs库
YACS是一个轻量级库,用于定义和管理系统配置,例如那些在为科学实验设计的软件中常见的配置。这些“配置”通常涵盖诸如用于训练机器学习模型的超参数或可配置模型超参数(诸如卷积神经网络的深度)之类的概念。 由于您正在进行科学研究,因此重复性至关重要,因此您需要一种可靠的方法来序列化实验配置。 YACS使用YAML作为简单的,人类可读的序列化格式。范式是:your code + a YACS config for experiment E (+ external dependencies + hardware + other nuisance terms …) = reproducible experiment E。虽然您可能无法控制所有内容,但至少可以控制代码和实验配置。 YACS随时为您提供帮助。
用法
要在项目中使用YACS,首先要创建一个项目配置文件,通常称为config.py或defaults.py。 此文件是所有可配置选项的一站式参考点。 它应该有很好的文档记录,并为所有选项提供合理的默认值。
cfg的配置
# my_project/config.py
from yacs.config import CfgNode as CN
#创建一个配置节点_C
_C = CN()
#在_C下创建新的配置节点_C.SYSTEM
#给_C.SYSTEM的属性配置默认值
_C.SYSTEM = CN()
# Number of GPUS to use in the experiment
_C.SYSTEM.NUM_GPUS = 8
# Number of workers for doing things
_C.SYSTEM.NUM_WORKERS = 4
_C.TRAIN = CN()
# A very important hyperparameter
_C.TRAIN.HYPERPARAMETER_1 = 0.1
# The all important scales for the stuff
_C.TRAIN.SCALES = (2, 4, 8, 16)
def get_cfg_defaults():
"""Get a yacs CfgNode object with default values for my_project."""
# 克隆一份配置节点_C的信息返回,_C的信息不会改变
# This is for the "local variable" use pattern
return _C.clone()
# Alternatively, provide a way to import the defaults as
# a global singleton:
# cfg = _C # users can `from config import cfg
cfg即_C为程序的参数配置,其部分结构如下所示:
从路径G:\pytorch\test\maskrcnn-benchmark-master下的predictor.py
程序中引入COCODemo
类
如下所示:
class COCODemo(object):
# COCO categories for pretty print
CATEGORIES = [
"__background",
"person",
"bicycle",
"car",
"motorcycle",
"airplane",
"bus",
"train",
"truck",
"boat",
"traffic light",
"fire hydrant",
"stop sign",
"parking meter",
"bench",
"bird",
"cat",
"dog",
"horse",
"sheep",
"cow",
"elephant",
"bear",
"zebra",
"giraffe",
"backpack",
"umbrella",
"handbag",
"tie",
"suitcase",
"frisbee",
"skis",
"snowboard",
"sports ball",
"kite",
"baseball bat",
"baseball glove",
"skateboard",
"surfboard",
"tennis racket",
"bottle",
"wine glass",
"cup",
"fork",
"knife",
"spoon",
"bowl",
"banana",
"apple",
"sandwich",
"orange",
"broccoli",
"carrot",
"hot dog",
"pizza",
"donut",
"cake",
"chair",
"couch",
"potted plant",
"bed",
"dining table",
"toilet",
"tv",
"laptop",
"mouse",
"remote",
"keyboard",
"cell phone",
"microwave",
"oven",
"toaster",
"sink",
"refrigerator",
"book",
"clock",
"vase",
"scissors",
"teddy bear",
"hair drier",
"toothbrush",
]
以下是程序的配置参数,
def main():
parser = argparse.ArgumentParser(description="PyTorch Object Detection Webcam Demo")
parser.add_argument(
"--config-file",
default="configs/e2e_faster_rcnn_R_50_C4_1x.yaml",
metavar="FILE",
help="path to config file",
)
parser.add_argument(
"--weights",
default="e2e_faster_rcnn_R_50_C4_1x.pth",
metavar="FILE",
help="path to the trained model",
)
parser.add_argument(
"--images-dir",
default="demo/images",
metavar="DIR",
help="path to demo images directory",
)
parser.add_argument(
"--min-image-size",
type=int,
default=800,
help="Smallest size of the image to feed to the model. "
"Model was trained with 800, which gives best results",
)
parser.add_argument(
"opts",
help="Modify model config options using the command-line",
default=None,
nargs=argparse.REMAINDER,
)
args = parser.parse_args()
argparse.REMAINDER:
nargs=argparse.REMAINDER,所有剩余的参数,均转化为一个列表赋值给此项,通常用此方法来将剩余的参数传入另一个parser进行解析。如果nargs没有定义,则可传入参数的数量由action决定,通常情况下为一个,并且不会生成长度为一的列表。
在初始设置之后,最好通过调用freeze()方法将配置冻结以防止进一步修改。
# load config from file and command-line arguments
cfg.merge_from_file(args.config_file)
cfg.merge_from_list(args.opts)
cfg.MODEL.WEIGHT = args.weights
cfg.freeze()
- cfg.merge_from_file(args.config_file):config_file是指定的yaml配置文件,通过merge_from_file这个函数会将yaml文件中指定的超参数对默认值进行覆盖。
- cfg.merge_from_list(args.opts):merge_from_list作用同上面的类似,只不过是通过命令行的方式覆盖。
- cfg.freeze(): freeze函数的作用是将超参数值冻结,避免被程序不小心修改。
# The following per-class thresholds are computed by maximizing
# per-class f-measure in their precision-recall curve.
# Please see compute_thresholds_for_classes() in coco_eval.py for details.
thresholds_for_classes = [
0.23860901594161987, 0.24108672142028809, 0.2470853328704834,
0.2316885143518448, 0.2708061933517456, 0.23173952102661133,
0.31990334391593933, 0.21302376687526703, 0.20151866972446442,
0.20928964018821716, 0.3793887197971344, 0.2715213894844055,
0.2836397588253021, 0.26449233293533325, 0.1728038638830185,
0.314998596906662, 0.28575003147125244, 0.28987520933151245,
0.2727000117301941, 0.23306897282600403, 0.265937477350235,
0.32663893699645996, 0.27102580666542053, 0.29177549481391907,
0.2043062448501587, 0.24331751465797424, 0.20752687752246857,
0.22951272130012512, 0.22753854095935822, 0.2159966081380844,
0.1993938684463501, 0.23676514625549316, 0.20982342958450317,
0.18315598368644714, 0.2489681988954544, 0.24793922901153564,
0.287187397480011, 0.23045086860656738, 0.2462811917066574,
0.21191294491291046, 0.22845126688480377, 0.24365000426769257,
0.22687821090221405, 0.18365581333637238, 0.2035856395959854,
0.23478077352046967, 0.18431290984153748, 0.18184082210063934,
0.2708037495613098, 0.2268175482749939, 0.19970566034317017,
0.21832780539989471, 0.21120598912239075, 0.270445853471756,
0.189377561211586, 0.2101106345653534, 0.2112293541431427,
0.23484709858894348, 0.22701986134052277, 0.20732736587524414,
0.1953316181898117, 0.3237660229206085, 0.3078872859477997,
0.2881140112876892, 0.38746657967567444, 0.20038367807865143,
0.28123822808265686, 0.2588447630405426, 0.2796839773654938,
0.266757994890213, 0.3266656696796417, 0.25759157538414,
0.2578003704547882, 0.17009201645851135, 0.29051828384399414,
0.24002137780189514, 0.22378061711788177, 0.26134759187698364,
0.1730124056339264, 0.1857597529888153
]
coco_val.py
路径:G:\pytorch\test\maskrcnn-benchmark-master\build\lib.win-amd64-3.6\maskrcnn_benchmark\data\datasets\evaluation\coco
demo_im_names = os.listdir(args.images_dir)
# prepare object that handles inference plus adds predictions on top of image
coco_demo = COCODemo(
cfg,
#confidence_thresholds_for_classes=thresholds_for_classes,
min_image_size=args.min_image_size
)
引入COCODemo类
,路径:G:\pytorch\test\maskrcnn-benchmark-master下的predictor.py程序
COCODemo类的定义
def __init__(
self,
cfg,
confidence_threshold=0.7,
show_mask_heatmaps=False,
masks_per_dim=2,
min_image_size=224,
): #初始化
self.cfg = cfg.clone()
self.model = build_detection_model(cfg)#build_detection_model见下面解释
self.model.eval()#eval() 函数用来执行一个字符串表达式,并返回表达式的值。
self.device = torch.device(cfg.MODEL.DEVICE)
self.model.to(self.device)
self.min_image_size = min_image_size
save_dir = cfg.OUTPUT_DIR
checkpointer = DetectronCheckpointer(cfg, self.model, save_dir=save_dir)#DetectronCheckpointer见下面解释
_ = checkpointer.load(cfg.MODEL.WEIGHT)
self.transforms = self.build_transform()
mask_threshold = -1 if show_mask_heatmaps else 0.5
self.masker = Masker(threshold=mask_threshold, padding=1)
# used to make colors for each class
self.palette = torch.tensor([2 ** 25 - 1, 2 ** 15 - 1, 2 ** 21 - 1])
self.cpu_device = torch.device("cpu")
self.confidence_threshold = confidence_threshold
self.show_mask_heatmaps = show_mask_heatmaps
self.masks_per_dim = masks_per_dim
建立检测模型build_detection_model()
build_detection_model()
在路径G:\pytorch\test\maskrcnn-benchmark-master\maskrcnn_benchmark\modeling\detector下的detector.py
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
from .generalized_rcnn import GeneralizedRCNN
_DETECTION_META_ARCHITECTURES = {"GeneralizedRCNN": GeneralizedRCNN}
def build_detection_model(cfg):
meta_arch = _DETECTION_META_ARCHITECTURES[cfg.MODEL.META_ARCHITECTURE]
return meta_arch(cfg)
GeneralizedRCNN
在路径G:\pytorch\test\maskrcnn-benchmark-master\maskrcnn_benchmark\modeling\detector下的generalized_rcnn.py
中,
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
"""
Implements the Generalized R-CNN framework
"""
import torch
from torch import nn
from maskrcnn_benchmark.structures.image_list import to_image_list
from ..backbone import build_backbone
from ..rpn.rpn import build_rpn
from ..roi_heads.roi_heads import build_roi_heads
class GeneralizedRCNN(nn.Module):
"""
Main class for Generalized R-CNN. Currently supports boxes and masks.
It consists of three main parts:
- backbone
- rpn
- heads: takes the features + the proposals from the RPN and computes
detections / masks from it.
"""
def __init__(self, cfg):
super(GeneralizedRCNN, self).__init__()
self.backbone = build_backbone(cfg) #build_backbone下面解释
self.rpn = build_rpn(cfg, self.backbone.out_channels)
self.roi_heads = build_roi_heads(cfg, self.backbone.out_channels)
super().__init__()
是内置函数,类的继承
build_backbone
在路径G:\pytorch\test\maskrcnn-benchmark-master\maskrcnn_benchmark\modeling\backbone下的backbone.py
建立主干网络backbone
backbone.py程序详解
参考博客:https://blog.csdn.net/leijieZhang/article/details/90748788
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
from collections import OrderedDict
from torch import nn
from maskrcnn_benchmark.modeling import registry
from maskrcnn_benchmark.modeling.make_layers import conv_with_kaiming_uniform
from . import fpn as fpn_module
from . import resnet
@registry.BACKBONES.register("R-50-C4") #检索注册表,略过
@registry.BACKBONES.register("R-50-C5")
@registry.BACKBONES.register("R-101-C4")
@registry.BACKBONES.register("R-101-C5")
def build_resnet_backbone(cfg):
body = resnet.ResNet(cfg) #见下面解释
model = nn.Sequential(OrderedDict([("body", body)]))
model.out_channels = cfg.MODEL.RESNETS.BACKBONE_OUT_CHANNELS #_C.MODEL.RESNETS.BACKBONE_OUT_CHANNELS = 256 * 4
return model
from . import resnet
指向该路径G:\pytorch\test\maskrcnn-benchmark-master\maskrcnn_benchmark\modeling\backbone下的resnet.py
body = resnet.ResNet(cfg)
调用resnet中的ResNet函数
ResNet网络和FPN网络
详细理解:个人理解笔记
建立RPN: build_rpn()
class RPNModule(torch.nn.Module):
"""
Module for RPN computation. Takes feature maps from the backbone and outputs
RPN proposals and losses. Works for both FPN and non-FPN.
"""
def __init__(self, cfg, in_channels):
super(RPNModule, self).__init__()
self.cfg = cfg.clone()
anchor_generator = make_anchor_generator(cfg)
rpn_head = registry.RPN_HEADS[cfg.MODEL.RPN.RPN_HEAD]
head = rpn_head(
cfg, in_channels, anchor_generator.num_anchors_per_location()[0]
)
rpn_box_coder = BoxCoder(weights=(1.0, 1.0, 1.0, 1.0))
box_selector_train = make_rpn_postprocessor(cfg, rpn_box_coder, is_train=True)
box_selector_test = make_rpn_postprocessor(cfg, rpn_box_coder, is_train=False)
loss_evaluator = make_rpn_loss_evaluator(cfg, rpn_box_coder)
self.anchor_generator = anchor_generator
self.head = head
self.box_selector_train = box_selector_train
self.box_selector_test = box_selector_test
self.loss_evaluator = loss_evaluator
build_retinanet
生成anchor
from .anchor_generator import make_anchor_generator
anchor_generator = make_anchor_generator(cfg)
anchor_generator.py
路径:G:\pytorch\test\maskrcnn-benchmark-master\maskrcnn_benchmark\modeling\rpn,从中引入make_anchor_generator()
函数
def make_anchor_generator(config):
#参数看defaults.py的初始化设置
# anchor的大小,可以为多个,其大小是针对源图像的
anchor_sizes = config.MODEL.RPN.ANCHOR_SIZES #= (32, 64, 128, 256, 512)
# anchor边框比例,即长宽比,一般为3组,即1:1, 1:2, 2:1
aspect_ratios = config.MODEL.RPN.ASPECT_RATIOS #= (0.5, 1.0, 2.0)
# anchor在特征图上滑动的步长,为一组,在不同的特征图上滑动的步长不同
anchor_stride = config.MODEL.RPN.ANCHOR_STRIDE #FPN的yaml文件中ANCHOR_STRIDE: (4, 8, 16, 32, 64)
# 设置针对anchor的宽容度,也就是允许anchor在多大尺度上错误,
# 比如anchor位置超出图片多少程度,大小比图片大多少可以接受,超过部分将视作错误边框
straddle_thresh = config.MODEL.RPN.STRADDLE_THRESH #= 0
if config.MODEL.RPN.USE_FPN:
# anchor尺寸的列表要与anchor步长的列表长度相同
assert len(anchor_stride) == len(
anchor_sizes
), "FPN should have len(ANCHOR_STRIDE) == len(ANCHOR_SIZES)"
else:
assert len(anchor_stride) == 1, "Non-FPN should have a single ANCHOR_STRIDE"
anchor_generator = AnchorGenerator(
anchor_sizes, aspect_ratios, anchor_stride, straddle_thresh
)
return anchor_generator
边框操作:BoxCoder
对Bounding box回归的理解,参考博客:https://blog.csdn.net/u011534057/article/details/51235964?utm_medium=distribute.pc_relevant.none-task-blog-BlogCommendFromBaidu-29&depth_1-utm_source=distribute.pc_relevant.none-task-blog-BlogCommendFromBaidu-29
微调proposal box即找最接近Ground truth的平移伸缩变换映射。
RPN 后处理:make_rpn_postprocessor
inference.py
程序理解参考博客:https://blog.csdn.net/leijieZhang/article/details/91448842
对于每一个 feature map 的输出,根据 score 排序,保留至多前PRE_NMS_TOP_N
个 box。需要注意的是这里是对整个 batch 内部进行排序,而不是单张图片内部进行排序。之后通过clip_to_image去除超出图片边界的框,通过remove_small_boxes
去掉过小的框(这里阈值为 0,即不会删框)。然后进行 NMS,保留至多POST_NMS_TOP_N
个框。
因为有五个特征输出,所以经过上一步会得到至多5*POST_NMS_TOP_N个 proposals。通过 score 统一排序,保留前FPN_POST_NMS_TOP_N个。这一步训练和测试稍有区别,训练时是整个 batch 排序,而测试时是每张图片分别排序。不过从源代码的注释上来看,这两个阶段应该会统一成后一种。
最后,如果是训练,则把 ground truth 也加入到 proposal 中,丰富正样本。
RPN Loss:make_rpn_loss_evaluator
loss.py
程序理解参考:https://blog.csdn.net/leijieZhang/article/details/91588292
Smooth L1 Loss(参考Fast R-CNN):
理解参考:https://www.zhihu.com/question/58200555
建立ROI_head
roi_heads 部分,文件夹下分为 box_head, keypoint_head, mask_head 三个文件夹对应相应的head,同级目录下的 roi_heads.py 用于合并单独的 heads 为一个统一的head。
代码参考:https://blog.csdn.net/leijiezhang/article/details/92063984
推断过程
从demo的推断,
参考博客:https://blog.csdn.net/m0_37644085/article/details/88688943
输出
OrderedDict([('backbone', Sequential(
(body): ResNet(
(stem): StemWithFixedBatchNorm(
(conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
(bn1): FrozenBatchNorm2d()
)
(layer1): Sequential(
(0): BottleneckWithFixedBatchNorm(
(downsample): Sequential(
(0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
(1): FrozenBatchNorm2d()
)
(conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn1): FrozenBatchNorm2d()
(conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn2): FrozenBatchNorm2d()
(conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn3): FrozenBatchNorm2d()
)
(1): BottleneckWithFixedBatchNorm(
(conv1): Conv2d(256, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn1): FrozenBatchNorm2d()
(conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn2): FrozenBatchNorm2d()
(conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn3): FrozenBatchNorm2d()
)
(2): BottleneckWithFixedBatchNorm(
(conv1): Conv2d(256, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn1): FrozenBatchNorm2d()
(conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn2): FrozenBatchNorm2d()
(conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn3): FrozenBatchNorm2d()
)
)
(layer2): Sequential(
(0): BottleneckWithFixedBatchNorm(
(downsample): Sequential(
(0): Conv2d(256, 512, kernel_size=(1, 1), stride=(2, 2), bias=False)
(1): FrozenBatchNorm2d()
)
(conv1): Conv2d(256, 128, kernel_size=(1, 1), stride=(2, 2), bias=False)
(bn1): FrozenBatchNorm2d()
(conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn2): FrozenBatchNorm2d()
(conv3): Conv2d(128, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn3): FrozenBatchNorm2d()
)
(1): BottleneckWithFixedBatchNorm(
(conv1): Conv2d(512, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn1): FrozenBatchNorm2d()
(conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn2): FrozenBatchNorm2d()
(conv3): Conv2d(128, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn3): FrozenBatchNorm2d()
)
(2): BottleneckWithFixedBatchNorm(
(conv1): Conv2d(512, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn1): FrozenBatchNorm2d()
(conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn2): FrozenBatchNorm2d()
(conv3): Conv2d(128, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn3): FrozenBatchNorm2d()
)
(3): BottleneckWithFixedBatchNorm(
(conv1): Conv2d(512, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn1): FrozenBatchNorm2d()
(conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn2): FrozenBatchNorm2d()
(conv3): Conv2d(128, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn3): FrozenBatchNorm2d()
)
)
(layer3): Sequential(
(0): BottleneckWithFixedBatchNorm(
(downsample): Sequential(
(0): Conv2d(512, 1024, kernel_size=(1, 1), stride=(2, 2), bias=False)
(1): FrozenBatchNorm2d()
)
(conv1): Conv2d(512, 256, kernel_size=(1, 1), stride=(2, 2), bias=False)
(bn1): FrozenBatchNorm2d()
(conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn2): FrozenBatchNorm2d()
(conv3): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn3): FrozenBatchNorm2d()
)
(1): BottleneckWithFixedBatchNorm(
(conv1): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn1): FrozenBatchNorm2d()
(conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn2): FrozenBatchNorm2d()
(conv3): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn3): FrozenBatchNorm2d()
)
(2): BottleneckWithFixedBatchNorm(
(conv1): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn1): FrozenBatchNorm2d()
(conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn2): FrozenBatchNorm2d()
(conv3): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn3): FrozenBatchNorm2d()
)
(3): BottleneckWithFixedBatchNorm(
(conv1): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn1): FrozenBatchNorm2d()
(conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn2): FrozenBatchNorm2d()
(conv3): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn3): FrozenBatchNorm2d()
)
(4): BottleneckWithFixedBatchNorm(
(conv1): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn1): FrozenBatchNorm2d()
(conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn2): FrozenBatchNorm2d()
(conv3): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn3): FrozenBatchNorm2d()
)
(5): BottleneckWithFixedBatchNorm(
(conv1): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn1): FrozenBatchNorm2d()
(conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn2): FrozenBatchNorm2d()
(conv3): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn3): FrozenBatchNorm2d()
)
)
(layer4): Sequential(
(0): BottleneckWithFixedBatchNorm(
(downsample): Sequential(
(0): Conv2d(1024, 2048, kernel_size=(1, 1), stride=(2, 2), bias=False)
(1): FrozenBatchNorm2d()
)
(conv1): Conv2d(1024, 512, kernel_size=(1, 1), stride=(2, 2), bias=False)
(bn1): FrozenBatchNorm2d()
(conv2): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn2): FrozenBatchNorm2d()
(conv3): Conv2d(512, 2048, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn3): FrozenBatchNorm2d()
)
(1): BottleneckWithFixedBatchNorm(
(conv1): Conv2d(2048, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn1): FrozenBatchNorm2d()
(conv2): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn2): FrozenBatchNorm2d()
(conv3): Conv2d(512, 2048, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn3): FrozenBatchNorm2d()
)
(2): BottleneckWithFixedBatchNorm(
(conv1): Conv2d(2048, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn1): FrozenBatchNorm2d()
(conv2): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn2): FrozenBatchNorm2d()
(conv3): Conv2d(512, 2048, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn3): FrozenBatchNorm2d()
)
)
)
(fpn): FPN(
(fpn_inner1): Conv2d(256, 256, kernel_size=(1, 1), stride=(1, 1))
(fpn_layer1): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(fpn_inner2): Conv2d(512, 256, kernel_size=(1, 1), stride=(1, 1))
(fpn_layer2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(fpn_inner3): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1))
(fpn_layer3): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(fpn_inner4): Conv2d(2048, 256, kernel_size=(1, 1), stride=(1, 1))
(fpn_layer4): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(top_blocks): LastLevelMaxPool()
)
)), ('rpn', RPNModule(
(anchor_generator): AnchorGenerator(
(cell_anchors): BufferList()
)
(head): RPNHead(
(conv): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(cls_logits): Conv2d(256, 3, kernel_size=(1, 1), stride=(1, 1))
(bbox_pred): Conv2d(256, 12, kernel_size=(1, 1), stride=(1, 1))
)
(box_selector_train): RPNPostProcessor()
(box_selector_test): RPNPostProcessor()
)), ('roi_heads', CombinedROIHeads(
(box): ROIBoxHead(
(feature_extractor): FPN2MLPFeatureExtractor(
(pooler): Pooler(
(poolers): ModuleList(
(0): ROIAlign(output_size=(7, 7), spatial_scale=0.25, sampling_ratio=2)
(1): ROIAlign(output_size=(7, 7), spatial_scale=0.125, sampling_ratio=2)
(2): ROIAlign(output_size=(7, 7), spatial_scale=0.0625, sampling_ratio=2)
(3): ROIAlign(output_size=(7, 7), spatial_scale=0.03125, sampling_ratio=2)
)
)
(fc6): Linear(in_features=12544, out_features=1024, bias=True)
(fc7): Linear(in_features=1024, out_features=1024, bias=True)
)
(predictor): FPNPredictor(
(cls_score): Linear(in_features=1024, out_features=81, bias=True)
(bbox_pred): Linear(in_features=1024, out_features=324, bias=True)
)
(post_processor): PostProcessor()
)
))])