RPN
关于faster rcnn中PRN的介绍大家可以自己看paper或者找点论坛看看, medium, CSDN, 知乎, 包括简书都有大量的资料做介绍, 本文只站在源码的角度给你介绍每一步的实现, 所以就不阐述原理了,见谅~~
代码入口lib/model/train_val.py# Construct the computation graph
lr, train_op = self.construct_graph(sess)
lr是学习率, train_op是训练网络的一系列操作。
让我们走进construct_graph函数lib/model/train_val.py
def construct_graph(self, sess):
with sess.graph.as_default(): # Set the random seed for tensorflow
tf.set_random_seed(cfg.RNG_SEED) # Build the main computation graph
layers = self.net.create_architecture('TRAIN', self.imdb.num_classes, tag='default',
anchor_scales=cfg.ANCHOR_SCALES,
anchor_ratios=cfg.ANCHOR_RATIOS) # Define the loss
loss = layers['total_loss'] # Set learning rate and momentum
lr = tf.Variable(cfg.TRAIN.LEARNING_RATE, trainable=False) self.optimizer = tf.train.MomentumOptimizer(lr, cfg.TRAIN.MOMENTUM) # Compute the gradients with regard to the loss
gvs = self.optimizer.compute_gradients(loss) # Double the gradient of the bias if set
if cfg.TRAIN.DOUBLE_BIAS:
final_gvs = []
with tf.variable_scope('Gradient_Mult') as scope: for grad, var in gvs:
scale = 1.
if cfg.TRAIN.DOUBLE_BIAS and '/biases:' in var.name:
scale *= 2.
if not np.allclose(scale, 1.0):
grad = tf.multiply(grad, scale)
final_gvs.append((grad, var))
train_op = self.optimizer.apply_gradients(final_gvs) else:
train_op = self.optimizer.apply_gradients(gvs) # We will handle the snapshots ourselves
self.saver = tf.train.Saver(max_to_keep=100000) # Write the train and validation information to tensorboard
self.writer = tf.summary.FileWriter(self.tbdir, sess.graph) self.valwriter = tf.summary.FileWriter(self.tbvaldir) return lr, train_op
代码其实将流程阐述的非常清楚,我再废话给大家总结一下~~给tensorflow设置随机种子seed(为啥要这样,可以百度一下)
建立一个计算图computational graph(重点,下面介绍)
定义了一个执行Momentum算法的优化器accumulation = momentum * accumulation + gradient
variable -= learning_rate * accumulation计算损失参数的梯度self.optimizer.compute_gradients(loss)
将梯度应用于变量self.optimizer.apply_gradients(gvs), 返回值就是train_op
定义Saver(用于快照-缓存), writer, valwriter(把信息及时传入tensorboard)
然后走进create_architecture函数lib/nets/network.py
def create_architecture(self, mode, num_classes, tag=None,
anchor_scales=(8, 16, 32), anchor_ratios=(0.5, 1, 2)): self._image = tf.placeholder(tf.float32, shape=[1, None, None, 3]) self._im_info = tf.placeholder(tf.float32, shape=[3]) self._gt_boxes = tf.placeholder(tf.float32, shape=[None, 5]) self._tag = tag self._num_classes = num_classes self._mode = mode self._anchor_scales = anchor_scales self._num_scales = len(anchor_scales) self._anchor_ratios = anchor_ratios self._num_ratios = len(anchor_ratios) self._num_anchors = self._num_scales * self._num_ratios
training = mode == 'TRAIN'
testing = mode == 'TEST'
assert tag != None # handle most of the regularizers here
weights_regularizer = tf.contrib.layers.l2_regularizer(cfg.TRAIN.WEIGHT_DECAY) if cfg.TRAIN.BIAS_DECAY:
biases_regularizer = weights_regularizer else:
biases_regularizer = tf.no_regularizer # list as many types of layers as possible, even if they are not used now
with arg_scope([slim.conv2d, slim.conv2d_in_plane, \
slim.conv2d_transpose, slim.separable_conv2d, slim.fully_connected],
weights_regularizer=weights_regularizer,
biases_regularizer=biases_regularizer,
biases_initializer=tf.constant_initializer(0.0)):
rois, cls_prob, bbox_pred = self._build_network(training)
layers_to_output = {'rois': rois} for var in tf.trainable_variables(): self._train_summaries.append(var) if testing:
stds = np.tile(np.array(cfg.TRAIN.BBOX_NORMALIZE_STDS), (self._num_classes))
means = np.tile(np.array(cfg.TRAIN.BBOX_NORMALIZE_MEANS), (self._num_classes)) self._predictions["bbox_pred"] *= stds self._predictions["bbox_pred"] += means else: self._add_losses()
layers_to_output.update(self._losses)
val_summaries = []
with tf.device("/cpu:0"):
val_summaries.append(self._add_gt_image_summary()) for key, var in self._event_summaries.items():
val_summaries.append(tf.summary.scalar(key, var)) for key, var in self._score_summaries.items(): self._add_score_summary(key, var) for var in self._act_summaries: self._add_act_summary(var) for var in self._train_summaries: self._add_train_summary(var) self._summary_op = tf.summary.merge_all() self._summary_op_val = tf.summary.merge(val_summaries)
layers_to_output.update(self._predictions) return layers_to_output
很多人(包括我自己)对tensorflow还不是很熟悉,所以这里还是给大家概括一下程序流程给network的成员变量赋值
定义权重weights的正则regularizer
建立网络self._build_network(training) (重点)
定义损失函数, 包括RPN class loss, RPN bbox loss,整个RCNN网络的class loss和最终确定的物体边框bbox loss, 细节可以看这个函数_add_losses
更新一下tensorboard用得到的参数
然后我们了解一下_build_network函数lib/nets/network.py
def _build_network(self, is_training=True): # select initializers
if cfg.TRAIN.TRUNCATED:
initializer = tf.truncated_normal_initializer(mean=0.0, stddev=0.01)
initializer_bbox = tf.truncated_normal_initializer(mean=0.0, stddev=0.001) else:
initializer = tf.random_normal_initializer(mean=0.0, stddev=0.01)
initializer_bbox = tf.random_normal_initializer(mean=0.0, stddev=0.001)
net_conv = self._image_to_head(is_training)
with tf.variable_scope(self._scope, self._scope): # build the anchors for the image
self._anchor_component() # region proposal network
rois = self._region_proposal(net_conv, is_training, initializer) # region of interest pooling
if cfg.POOLING_MODE == 'crop':
pool5 = self._crop_pool_layer(net_conv, rois, "pool5") else:
raise NotImplementedError
fc7 = self._head_to_tail(pool5, is_training)
with tf.variable_scope(self._scope, self._scope): # region classification
cls_prob, bbox_pred = self._region_classification(fc7, is_training,
initializer, initializer_bbox) self._score_summaries.update(self._predictions) return rois, cls_prob, bbox_pred初始化权重weight, 用截断的normal initializer或者随机的normal initializer
构建主干网络前端_image_to_head
构建anchors
构建RPN
ROI pooling 调用函数_crop_pool_layer
构建主干网络的尾部 fc7 = self._head_to_tail(pool5, is_training)
object分类以及边框预测的回归
各位是不是一脸萌币。。。不要紧, 下面我会给大家详细介绍上述的每一个步骤。
构建主干网络前端
_image_to_head方法是一个类Network的一个abstract class, 以它的实现类Resnet 101为例lib/nets/resnet_v1.py def _image_to_head(self, is_training, reuse=None):
assert (0 <= cfg.RESNET.FIXED_BLOCKS <= 3) # Now the base is always fixed during training
with slim.arg_scope(resnet_arg_scope(is_training=False)):
net_conv = self._build_base() if cfg.RESNET.FIXED_BLOCKS > 0:
with slim.arg_scope(resnet_arg_scope(is_training=False)):
net_conv, _ = resnet_v1.resnet_v1(net_conv, self._blocks[0:cfg.RESNET.FIXED_BLOCKS],
global_pool=False,
include_root_block=False,
reuse=reuse,
scope=self._scope) if cfg.RESNET.FIXED_BLOCKS
with slim.arg_scope(resnet_arg_scope(is_training=is_training)):
net_conv, _ = resnet_v1.resnet_v1(net_conv, self._blocks[cfg.RESNET.FIXED_BLOCKS:-1],
global_pool=False,
include_root_block=False,
reuse=reuse,
scope=self._scope) self._act_summaries.append(net_conv) self._layers['head'] = net_conv return net_conv def _build_base(self):
with tf.variable_scope(self._scope, self._scope):
net = resnet_utils.conv2d_same(self._image, 64, 7, stride=2, scope='conv1')
net = tf.pad(net, [[0, 0], [1, 1], [1, 1], [0, 0]])
net = slim.max_pool2d(net, [3, 3], stride=2, padding='VALID', scope='pool1')
我会在下一篇文章中专门介绍resnet, 这里还是只做一个流程的简介。调用_build_base函数手动建立初始的几层: input -> 64 * 7 * 7 filters, stride = 2 -> padding -> max pooling
构建网络主干, 因为之前定义过self._blocksself._blocks = [resnet_v1_block('block1', base_depth=64, num_units=3, stride=2),
resnet_v1