这篇博文的内容是对Detect-and-Track的源代码进行解析,由于是第一篇,所以展示的是代码的脉络,以及如何寻找到3D Mask R-CNN的构建过程。博文的目录是按照文件来的:
参考内容链接如下:
Detect-and-Track论文:【网页链接】
Detect-and-Track源代码:【网页链接】
目录
二、lib/modeling/ model_builder.py
3. 根据参数创建模型的函数:build_generic_fast_rcnn_model()
一、tools/train_net.py
我们将代码翻到最后,可以看到主语句:
if __name__ == '__main__':
在中间部位,我们可以看到这样一个训练语句:
checkpoints = net_trainer()
定位到net_trainer(),此函数包含了全部的训练过程。代码详解之后的系列会推出,我们先来看第一句,如何构建一个模型:
def net_trainer():
model, start_iter, checkpoints = create_model() # ◆创建模型
……
定位到create_model()函数:
def create_model():
"""加载节点,创建model,进行一次训练"""
start_iter = 0
if cfg.CLUSTER.ON_CLUSTER and cfg.CLUSTER.AUTO_RESUME:
import re
# 训练好的模型model_final.pkl已经存在,不需要训练
output_dir = get_output_dir(training=True)
final_path = os.path.join(output_dir, 'model_final.pkl')
if os.path.exists(final_path):
logger.info('model_final.pkl exists; no need to train!')
return None, None, {'final': final_path}
# 模型训练了一半,有model_iter?.pkl存在
files = os.listdir(output_dir)
for f in files:
iter_string = re.findall(r'(?<=model_iter)\d+(?=\.pkl)', f)
if len(iter_string) > 0:
checkpoint_iter = int(iter_string[0])
if checkpoint_iter > start_iter:
# 立即开始间断点之后的训练
# Start one iteration immediately after the checkpoint iter
start_iter = checkpoint_iter + 1
resume_weights_file = f # 存储文件名
if start_iter > 0:
cfg.TRAIN.WEIGHTS = os.path.join(output_dir, resume_weights_file) # 生成权重路径,显示信息(路径/代数)
logger.info('===> Resuming from checkpoint {} with start iter {}'.format(cfg.TRAIN.WEIGHTS, start_iter))
logger.info('Building nework: {}'.format(cfg.MODEL.TYPE)) # TYPE:keypoint_rcnn
model = model_builder.create(cfg.MODEL.TYPE, train=True) # ◆◆◆◆◆◆◆◆◆◆◆◆◆◆创建一个keypoint_rcnn模型
# 如果选择了memonger,对模型进行内存优化
if cfg.MEMONGER:
optimize_memory(model)
workspace.RunNetOnce(model.param_init_net) # 跑一次网络
return model, start_iter, {}
可以看到代码首先是在加载已经训练的pkl文件,之后再由
model = model_builder.create(cfg.MODEL.TYPE, train=True) # TYPE:keypoint_rcnn
创建一个模型,而此模型的定义是在lib/modeling/ model_builder.py文件中。
二、lib/modeling/ model_builder.py
1.函数索引
在这个文件的creat()函数卡了很久,最后终于弄懂了caffe2调用函数的机制,我们来看一下creat()和get_func():
def create(model_name, train=False, init_params=None):
"""Generic model creation function that dispatches to specific model building functions.
Args:
train (bool): Set true if training
init_params (bool or None): Set to true if force initialize the network
with random weights (even at test time). Normally will init only
at train time.
"""
return get_func(model_name)(init_model(model_name, train, init_params))
# 这一步先获取keypoint_rcnn函数,然后再执行后面的参数
# 其实get_func(model_name)相当于keypoint_rcnn函数
# (init_model(model_name, train, init_params))相当于(model),创建了一个模型
def get_func(func_name):
"""Helper to return a function object by name.
根据 name 返回函数对象function object.
func_name 必须是该模块里的某个函数或者是想对于 base 'modeling' 模块的函数路径.
"""
# globals()返回一个字典, 表示当前的全局符号表。
# 这个符号表始终针对当前模块(对函数或方法来说, 是指定义它们的模块, 而不是调用它们的模块)
try:
parts = func_name.split('.') # keypoint_rcnn
res = globals()[parts[0]]
for part in parts[1:]:
res = getattr(res, part) # 获取res类的part属性
return res
except Exception:
logger.error('Failed to find function: {}'.format(func_name))
raise
通过注释大家应该了解到,get_func()这个函数仅仅只是起到一个索引的功能,由给定的字符串来找到当前文件中与其对应的函数。比如程序给定的字符串是” keypoint_rcnn”,那么get_func()检查当前文件下是否有此函数,如果有的话,再返回函数名。此时create()中return的内容就变成了:
keypoint_rcnn (init_model(model_name, train, init_params))
2.创建模型:keypoint_rcnn()
定位到keypoint_rcnn()函数,此函数直接创建了3D Mask R-CNN。我们根据配置文件将这些内容显示注释出来:
def keypoint_rcnn(model):
return build_generic_fast_rcnn_model(
model, # 模型本体
get_func(cfg.MODEL.CONV_BODY), # ResNet3D.add_ResNet18_conv4_body 残差网络主体网络
get_func(cfg.MODEL.ROI_HEAD), # ResNet3D.add_ResNet18_roi_conv5_head 残差网络RoI五层
add_roi_keypoint_head_func=get_func(cfg.KRCNN.ROI_KEYPOINTS_HEAD) # keypoint_rcnn_heads.add_roi_pose_head_v1convX_3d
)
# 相当于:
# build_generic_fast_rcnn_model(
# model,
# ResNet3D.add_ResNet18_conv4_body,
# ResNet.add_ResNet18_roi_conv5_head,
# keypoint_rcnn_heads.add_roi_pose_head_v1convX_3d,
# )
思路非常清晰,框架由3个大部分构成:
★Backbone主干网路:用了3D版的ResNet18,4阶段版本。
★RoI网络:用于生成RoI、进行RoIAlign
★Keypoint_rcnn_heads:网络头,进行关键点评估
3. 根据参数创建模型的函数:build_generic_fast_rcnn_model()
keypoint_rcnn()传入参数后,build_generic_fast_rcnn_model()中_single_gpu_build_func(model)函数完成创建模型的任务。这个函数中包含着三大子框架的构建过程,需要结合着看。在后期会更新三个子框架代码的详解,同时更行此代码注释。
# 创建通用的Fast R-CNN系列模型
def build_generic_fast_rcnn_model(
model, # 模型本体
add_conv_body_func, # 添加backbone主体
add_roi_frcn_head_func, # RoI卷积头(cls & reg用?)
add_roi_mask_head_func=None, # RoI的mask网络头函数
add_roi_keypoint_head_func=None, # RoI的关键点网络头函数
freeze_conv_body=False
):
def _single_gpu_build_func(model):
"""Builds the model on a single GPU. Can be called in a loop over GPUs
with name and device scoping to create a data parallel model."""
# For training we define one net that contains all ops
# For inference, we split the graph into two nets: a standard fast r-cnn
# net and a mask prediction net; the mask net is only applied to a
# subset of high-scoring detections
is_inference = not model.train # 判断是不是推理
# Some generic tensors
model.ConstantFill([], 'zero', shape=[1], value=0)
model.ConstantFill([], 'minus1', shape=[1], value=-1)
# ★创建ResNet卷积网络主体:ResNet3D.add_ResNet18_conv4_body
blob_conv, dim_conv, spatial_scale_conv = add_conv_body_func(model) # ResNet3D.add_ResNet18_conv4_body
if freeze_conv_body: # 是否要冻结Backbone的梯度传播
for b in blob_ref_to_list(blob_conv):
model.StopGradient(b, b)
# ★根据配置判断Backbone与Heads是否需要链接
# Convert from 3D blob to 2D, in case of videos to attach a 2D head (not necessarily will happen though)
if cfg.MODEL.VIDEO_ON: # 视频分析-开
blob_conv = time_pool_blobs(blob_conv, model, cfg.VIDEO.BODY_HEAD_LINK) # 卷积网络和网络头连接
if is_inference: # 是推理模式的话,就不需要创建RPN和网络头
# Create a net that can be used to compute the conv body only on an image (no RPN or heads / branches)
model.conv_body_net = model.net.Clone('conv_body_net')
# Select the FPN lib, based on whether the head is 3D or 2D
if cfg.MODEL.VIDEO_ON and cfg.VIDEO.BODY_HEAD_LINK == '':
FPN_lib = FPN3D
head_3d = True
out_time_dim = cfg.VIDEO.NUM_FRAMES_MID
else:
FPN_lib = FPN
head_3d = False
out_time_dim = 1
# Add the RPN branch
if cfg.MODEL.FASTER_RCNN:
if cfg.FPN.FPN_ON:
FPN_lib.add_fpn_rpn_outputs(
model, blob_conv, dim_conv, spatial_scale_conv,
time_dim=out_time_dim)
model.CollectAndDistributeFpnRpnProposals()
else:
add_rpn_outputs(model, blob_conv, dim_conv, spatial_scale_conv,
nd=head_3d, time_dim=out_time_dim)
if cfg.FPN.FPN_ON:
# Code only supports case when RPN and ROI min levels are the same
assert cfg.FPN.RPN_MIN_LEVEL == cfg.FPN.ROI_MIN_LEVEL
# FPN RPN max level might be > FPN ROI max level in which case we
# need to discard some leading conv blobs (blobs are ordered from
# max level to min level)
num_roi_levels = cfg.FPN.ROI_MAX_LEVEL - cfg.FPN.ROI_MIN_LEVEL + 1
blob_conv = blob_conv[-num_roi_levels:]
spatial_scale_conv = spatial_scale_conv[-num_roi_levels:]
# Add the Fast R-CNN branch
blob_frcn, dim_frcn, spatial_scale_frcn = add_roi_frcn_head_func(
model, blob_conv, dim_conv, spatial_scale_conv)
add_fast_rcnn_outputs(model, blob_frcn, dim_frcn, is_head_3d=head_3d)
# Add the mask branch
if cfg.MODEL.MASK_ON:
if is_inference:
bbox_net = copy.deepcopy(model.net.Proto())
# Add the mask branch
blob_mrcn, dim_mrcn, _ = add_roi_mask_head_func(
model, blob_conv, dim_conv, spatial_scale_conv)
blob_mask = add_mask_rcnn_outputs(model, blob_mrcn, dim_mrcn)
if is_inference:
# Extract the mask prediction net, store it as its own network,
# then restore the primary net to the bbox-only network
model.mask_net, blob_mask = get_suffix_net(
'mask_net', bbox_net.op, model.net, [blob_mask])
model.net._net = bbox_net
# Add the keypoint branch
if cfg.MODEL.KEYPOINTS_ON:
if is_inference:
bbox_net = copy.deepcopy(model.net.Proto())
blob_krcnn, dim_krcnn, _ = add_roi_keypoint_head_func(
model, blob_conv, dim_conv, spatial_scale_conv)
blob_keypoint = add_heatmap_outputs(
model, blob_krcnn, dim_krcnn,
time_dim=out_time_dim, is_head_3d=head_3d)
if is_inference:
model.keypoint_net, keypoint_blob_out = get_suffix_net(
'keypoint_net', bbox_net.op, model.net, [blob_keypoint])
model.net._net = bbox_net
if model.train:
loss_gradients = add_fast_rcnn_losses(model, time_dim=out_time_dim)
if cfg.MODEL.MASK_ON:
loss_gradients.update(add_mask_rcnn_losses(model, blob_mask,
time_dim=out_time_dim))
if cfg.MODEL.KEYPOINTS_ON:
loss_gradients.update(add_heatmap_losses(model, time_dim=out_time_dim))
if cfg.MODEL.FASTER_RCNN:
if cfg.FPN.FPN_ON:
# The loss function is shared between 2D and 3D FPN
loss_gradients.update(FPN.add_fpn_rpn_losses(
model, time_dim=out_time_dim))
if cfg.VIDEO.PREDICT_RPN_BOX_VIS:
loss_gradients.update(FPN.add_fpn_rpn_vis_losses(
model, time_dim=out_time_dim))
else:
loss_gradients.update(add_rpn_losses(
model, time_dim=out_time_dim))
if cfg.VIDEO.PREDICT_RPN_BOX_VIS:
loss_gradients.update(add_rpn_vis_losses(
model, time_dim=out_time_dim))
return loss_gradients if model.train else None
build_data_parallel_model(model, _single_gpu_build_func)
return model