AVOD-代码理解系列(二)

AVOD-代码理解

AVOD代码理解第二篇,开始介绍网络结构的两大部分rpn和avod部分,在core文件下的avod_model.py和rpn_model.py部分分别是结构的主体,其间会交叉一些相应的处理部分!这篇先介绍网络的输入.

1 在avod_model.py的build开头,需要来自rpn_model的输出作为avod部分的输入.
    def build(self):
        rpn_model = self._rpn_model

        # Share the same prediction dict as RPN
        #rpn_model/build
        '''rpn_model,第一阶段'''
        prediction_dict = rpn_model.build()
        #nms后的anchors
        top_anchors = prediction_dict[RpnModel.PRED_TOP_ANCHORS]
        ground_plane = rpn_model.placeholders[RpnModel.PL_GROUND_PLANE]
        #1:car
        class_labels = rpn_model.placeholders[RpnModel.PL_LABEL_CLASSES]
2 rpn_model.py部分
 def build(self):

        # Setup input placeholders
        # 一堆的输入信息.就是给输入占个坑
        self._set_up_input_pls()

        # Setup feature extractors
        # 输入图像的featuremap
        self._set_up_feature_extractors()
        # proposal_input
        # 1*1卷积后的bev_fasturemap和img_featuremap
        bev_proposal_input = self.bev_bottleneck
        img_proposal_input = self.img_bottleneck
        # 融合参数
        fusion_mean_div_factor = 2.0

self._set_up_featuremap_extrators是对输入的img图像和bev图像卷积,进行特征提取.

    def _set_up_feature_extractors(self):
        """Sets up feature extractors and stores feature maps and
        bottlenecks as member variables.
        """
        #得到输入鸟瞰图的featuremap.256
        self.bev_feature_maps, self.bev_end_points = \
            self._bev_feature_extractor.build(
                self._bev_preprocessed,
                self._bev_pixel_size,
                self._is_training)
        #得到输入图像的featuremap.和bev一样的操作
        self.img_feature_maps, self.img_end_points = \
            self._img_feature_extractor.build(
                self._img_preprocessed,
                self._img_pixel_size,
                self._is_training)
        #bev 1*1的卷积操作
        with tf.variable_scope('bev_bottleneck'):
            #对featuremap进行1*1的卷积
            self.bev_bottleneck = slim.conv2d(
                self.bev_feature_maps,
                1, [1, 1],
                scope='bottleneck',
                normalizer_fn=slim.batch_norm,
                normalizer_params={
                    'is_training': self._is_training})
        #img_bottleneck.和bev一样的操作
        with tf.variable_scope('img_bottleneck'):
            self.img_bottleneck = slim.conv2d(
                self.img_feature_maps,
                1, [1, 1],
                scope='bottleneck',
                normalizer_fn=slim.batch_norm,
                normalizer_params={
                    'is_training': self._is_training})

self._bev_feature_extractor是图片的特征提取层,对输入图像进行.self._img_feature_extractor的操作和其是一样的

    def build(self,
              inputs,
              input_pixel_size,
              is_training,
              scope='bev_vgg'):
        """ Modified VGG for BEV feature extraction

        Note: All the fully_connected layers have been transformed to conv2d
              layers and are implemented in the main model.

        Args:
            inputs: a tensor of size [batch_size, height, width, channels].
            input_pixel_size: size of the input (H x W)
            is_training: True for training, False fo validation/testing.
            scope: Optional scope for the variables.

        Returns:
            包含日志预测和end_points dict的最后一个操作
            The last op containing the log predictions and end_points dict.
        """
        #avod_car_example.config/rpn_config
        vgg_config = self.config
        #input为输入的bev_inout
        with slim.arg_scope(self.vgg_arg_scope(
                weight_decay=vgg_config.l2_weight_decay)):
            with tf.variable_scope(scope, 'bev_vgg', [inputs]) as sc:
                #end_points_collection?
                end_points_collection = sc.name + '_end_points'
                # Collect outputs for conv2d, fully_connected and max_pool2d.
                with slim.arg_scope([slim.conv2d, slim.max_pool2d],
                                    outputs_collections=end_points_collection):
                    #[2,32].重复两个卷积.输出channels=32
                    net = slim.repeat(inputs,
                                      vgg_config.vgg_conv1[0],
                                      slim.conv2d,
                                      vgg_config.vgg_conv1[1],
                                      [3, 3],
                                      normalizer_fn=slim.batch_norm,
                                      normalizer_params={
                                          'is_training': is_training},
                                      scope='conv1')
                    #max_pool2d的参数应该是不需要训练的
                    net = slim.max_pool2d(net, [2, 2], scope='pool1')
                    #[2,64].重复2个卷积,输出channels=64
                    net = slim.repeat(net,
                                      vgg_config.vgg_conv2[0],
                                      slim.conv2d,
                                      vgg_config.vgg_conv2[1],
                                      [3, 3],
                                      normalizer_fn=slim.batch_norm,
                                      normalizer_params={
                                          'is_training': is_training},
                                      scope='conv2')
                    net = slim.max_pool2d(net, [2, 2], scope='pool2')
                    #[3,128]
                    net = slim.repeat(net,
                                      vgg_config.vgg_conv3[0],
                                      slim.conv2d,
                                      vgg_config.vgg_conv3[1],
                                      [3, 3],
                                      normalizer_fn=slim.batch_norm,
                                      normalizer_params={
                                          'is_training': is_training},
                                      scope='conv3')
                    net = slim.max_pool2d(net, [2, 2], scope='pool3')
                    #[3,256]
                    net = slim.repeat(net,
                                      vgg_config.vgg_conv4[0],
                                      slim.conv2d,
                                      vgg_config.vgg_conv4[1],
                                      [3, 3],
                                      normalizer_fn=slim.batch_norm,
                                      normalizer_params={
                                          'is_training': is_training},
                                      scope='conv4')

                with tf.variable_scope('upsampling'):
                    # This extractor downsamples the input by a factor
                    # of 8 (3 maxpool layers)
                    downsampling_factor = 8
                    downsampled_shape = input_pixel_size / downsampling_factor
                    #*4.也就是最初的输入图像尺寸的1/2
                    upsampled_shape = \
                        downsampled_shape * vgg_config.upsampling_multiplier
                    #双线性插值 ,net里的目标尺寸变为原始图像的1/2
                    #和原文似乎有一点不对.这里的尺寸并不是原图的尺寸
                    feature_maps_out = tf.image.resize_bilinear(
                        net, upsampled_shape)

                # Convert end_points_collection into a end_point dict.
                #集合转换为字典
                end_points = slim.utils.convert_collection_to_dict(
                    end_points_collection)

                return feature_maps_out, end_points

self._set_up_input_pls是输入.这个部分有些细节部分还是不是很清楚,不过不是会很影响理解

    def _set_up_input_pls(self):
        """Sets up input placeholders by adding them to self._placeholders.
        Keys are defined as self.PL_*.
        """
        # Combine config data
        #输入的尺寸与深度.深度是6.在config文件里面有些是没有_bev_pixel_size的
        bev_dims = np.append(self._bev_pixel_size, self._bev_depth)
        #鸟瞰图输入
        with tf.variable_scope('bev_input'):
            # Placeholder for BEV image input, to be filled in with feed_dict
            #PL_BEV_INPUT一个名字.6个值
            bev_input_placeholder = self._add_placeholder(tf.float32, bev_dims,
                                                          self.PL_BEV_INPUT)
            #在列上增加维度
            self._bev_input_batches = tf.expand_dims(
                bev_input_placeholder, axis=0)
            #resize输入的尺寸.config文件里面没有限制bev的尺寸
            #预处理实际就是预处理输入图像的尺寸,有些config里面不对尺寸进行限制
            self. _bev_preprocessed = \
                self._bev_feature_extractor.preprocess_input(
                    self._bev_input_batches, self._bev_pixel_size)

            # Summary Images
            #沿着深度,切割成6分,
            bev_summary_images = tf.split(
                bev_input_placeholder, self._bev_depth, axis=2)
            tf.summary.image("bev_maps", bev_summary_images,
                             max_outputs=self._bev_depth)
        #图像输入
        with tf.variable_scope('img_input'):
            # Take variable size input images[none,none,3]
            img_input_placeholder = self._add_placeholder(
                tf.float32,
                [None, None, self._img_depth],
                self.PL_IMG_INPUT)

            self._img_input_batches = tf.expand_dims(
                img_input_placeholder, axis=0)

            self._img_preprocessed = \
                self._img_feature_extractor.preprocess_input(
                    self._img_input_batches, self._img_pixel_size)

            # Summary Image
            tf.summary.image("rgb_image", self._img_preprocessed,
                             max_outputs=2)
        #label.最后一个是名字.dtype,shape,name
        with tf.variable_scope('pl_labels'):
            #6维的anchors?
            self._add_placeholder(tf.float32, [None, 6],
                                  self.PL_LABEL_ANCHORS)
            #3D的boxes,x,y,z,dw,dh,dl,ry?
            self._add_placeholder(tf.float32, [None, 7],
                                  self.PL_LABEL_BOXES_3D)
            #标签的类
            self._add_placeholder(tf.float32, [None],
                                  self.PL_LABEL_CLASSES)

        # Placeholders for anchors
        with tf.variable_scope('pl_anchors'):
            self._add_placeholder(tf.float32, [None, 6],
                                  self.PL_ANCHORS)
            #ious
            self._add_placeholder(tf.float32, [None],
                                  self.PL_ANCHOR_IOUS)
            #这个应该就是六个回归∆t x , ∆t y , ∆t z , ∆d x , ∆d y , ∆d z
            self._add_placeholder(tf.float32, [None, 6],
                                  self.PL_ANCHOR_OFFSETS)
            #calss
            self._add_placeholder(tf.float32, [None],
                                  self.PL_ANCHOR_CLASSES)
            #鸟瞰图投影.anchor的投影
            with tf.variable_scope('bev_anchor_projections'):
                #左上角与右下角坐标
                self._add_placeholder(tf.float32, [None, 4],
                                      self.PL_BEV_ANCHORS)
                #norm一下
                self._bev_anchors_norm_pl = self._add_placeholder(
                    tf.float32, [None, 4], self.PL_BEV_ANCHORS_NORM)
            #rbg投影
            with tf.variable_scope('img_anchor_projections'):
                #RBG_anchor投影
                self._add_placeholder(tf.float32, [None, 4],
                                      self.PL_IMG_ANCHORS)
                self._img_anchors_norm_pl = self._add_placeholder(
                    tf.float32, [None, 4], self.PL_IMG_ANCHORS_NORM)
            #这个文件不知道是啥?一个txt文件
            with tf.variable_scope('saample_info'):
                # the calib matrix shape is (3 x 4)
                '''pl_calib_p2'''
                self._add_placeholder(
                    tf.float32, [3, 4], self.PL_CALIB_P2)
                '''pl_img_idx'''
                self._add_placeholder(tf.int32,
                                      shape=[1],
                                      name=self.PL_IMG_IDX)
                '''pl_ground_plane'''
                self._add_placeholder(tf.float32, [4], self.PL_GROUND_PLANE)
  • 2
    点赞
  • 4
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值