SSD代码解读之二——default box生成及标签整理

Default box生成(anchors)

Default box是对特定的feature map的每个点,生成特定数量、特定大小的box。生成的所有层的default box存入一个list中,每个feature map一个元素。

def ssd_anchors_all_layers(img_shape,
                           layers_shape,
                           anchor_sizes,
                           anchor_ratios,
                           anchor_steps,
                           offset=0.5,
                           dtype=np.float32):
    layers_anchors = []
    for i, s in enumerate(layers_shape):
        anchor_bboxes = ssd_anchor_one_layer(img_shape, s,
                                             anchor_sizes[i],
                                             anchor_ratios[i],
                                             anchor_steps[i],
                                             offset=offset, dtype=dtype)
        layers_anchors.append(anchor_bboxes)
    return layers_anchors

以第一个feature map 为例,代码解析如下。

  • 返回值为元组(y, x, h, w )
  • y 和 x 为所有的中心点的相对坐标,shape为[38, 38, 1],范围[0, 1]
  • h 和 w 为不同尺寸的高和宽,shape为[4,],范围[0, 1]
def ssd_anchor_one_layer(img_shape, #图像尺寸,(300, 300)
                         feat_shape, #特征图尺寸,(38, 38)
                         sizes, #anchor的尺寸, (21, 45)
                         ratios, #变形比例,[2, 0.5]
                         step, #特征图与原图的比例,300 / 38 = 8
                         offset=0.5, #偏移,使anchor的中点为特征图每个点的中心
                         dtype=np.float32):
    #按像素生成坐标,y=[[0, 0, ..., 0], 
    #			 	 [1, 1, ..., 1], 
    #				 	...
    #				 [37, 37, ..., 37]]
    y, x = np.mgrid[0:feat_shape[0], 0:feat_shape[1]] 
    #归一化, step=300/38=8,归一化后x和y的最大的坐标值为1
    y = (y.astype(dtype) + offset) * step / img_shape[0] 
    x = (x.astype(dtype) + offset) * step / img_shape[1]
    y = np.expand_dims(y, axis=-1)
    x = np.expand_dims(x, axis=-1)
	#anchor的长宽,包括尺寸为21和45的两个正方形,和两个在21的基础上进行变形的长方形
    num_anchors = len(sizes) + len(ratios)
    h = np.zeros((num_anchors, ), dtype=dtype)
    w = np.zeros((num_anchors, ), dtype=dtype)
    h[0] = sizes[0] / img_shape[0]
    w[0] = sizes[0] / img_shape[1]
    di = 1
    if len(sizes) > 1:
        h[1] = math.sqrt(sizes[0] * sizes[1]) / img_shape[0]
        w[1] = math.sqrt(sizes[0] * sizes[1]) / img_shape[1]
        di += 1
    for i, r in enumerate(ratios):
        h[i+di] = sizes[0] / img_shape[0] / math.sqrt(r)
        w[i+di] = sizes[0] / img_shape[1] * math.sqrt(r)
    return y, x, h, w #x和y为所有的中心点,h和w为不同尺寸的高和宽

标签整理(bboxes_encode)

原始的label和bbox不能直接用来计算loss,需要进行预处理,得到target_labels, target_localizations,和target_scores三个list,每个feature map对应一个元素。

def tf_ssd_bboxes_encode(labels,
                         bboxes,
                         anchors,
                         num_classes,
                         no_annotation_label,
                         ignore_threshold=0.5,
                         prior_scaling=[0.1, 0.1, 0.2, 0.2],
                         dtype=tf.float32,
                         scope='ssd_bboxes_encode'):
    with tf.name_scope(scope):
        target_labels = []
        target_localizations = []
        target_scores = []
        for i, anchors_layer in enumerate(anchors): #每个feature map一组anchors
            with tf.name_scope('bboxes_encode_block_%i' % i):
                t_labels, t_loc, t_scores = \
                    tf_ssd_bboxes_encode_layer(labels, bboxes, anchors_layer,
                                               num_classes, no_annotation_label,
                                               ignore_threshold,
                                               prior_scaling, dtype)
                target_labels.append(t_labels)
                target_localizations.append(t_loc)
                target_scores.append(t_scores)
        return target_labels, target_localizations, target_scores

具体每个元素的实现如下,解析以第一个feature map为例。

  • 返回值(feat_labels, feat_localizations, feat_scores)
    • feat_labels:每个default box对应的label,shape:(38, 38, 4)
    • feat_localizations:每个default box对应的偏移量shape:(38, 38, 4, 4),前3维对应每个default box,最后一维是[feat_cx, feat_cy, feat_w, feat_h]
    • feat_scores:每个default box对应的IOU,shape:(38, 38, 4)
def tf_ssd_bboxes_encode_layer(labels, #ground true的类别标签
                               bboxes, #ground true的真实坐标
                               anchors_layer, # 一个feature map对应的default box
                               num_classes, # 类别数
                               no_annotation_label, #与类别数相等
                               ignore_threshold=0.5, #阈值
                               prior_scaling=[0.1, 0.1, 0.2, 0.2], #缩放比例,后两个是w和h的偏移量的权重,比坐标的权重高
                               dtype=tf.float32):
    # anchor的坐标
    # yref, xref: default box的中点,shape:(38, 38, 1),范围[0, 1]
    # href, wref: default box的高和宽,shape: (4,),范围[0, 1]
    yref, xref, href, wref = anchors_layer
    # default box左上和右下坐标
    ymin = yref - href / 2. 
    xmin = xref - wref / 2.
    ymax = yref + href / 2.
    xmax = xref + wref / 2.
    # default box面积
    vol_anchors = (xmax - xmin) * (ymax - ymin)
    
    # shape = (38, 38, 4),feature map上的每个点有4个default box
    shape = (yref.shape[0], yref.shape[1], href.size)
    # 初始化,以下变量的shape都是(38,38,4),即每个default box一个元素
    feat_labels = tf.zeros(shape, dtype=tf.int64)
    feat_scores = tf.zeros(shape, dtype=dtype)
	# default box对应(IOU最大)的真实框
    feat_ymin = tf.zeros(shape, dtype=dtype)
    feat_xmin = tf.zeros(shape, dtype=dtype)
    feat_ymax = tf.ones(shape, dtype=dtype)
    feat_xmax = tf.ones(shape, dtype=dtype)
	
	#计算IOU
    def jaccard_with_anchors(bbox):
        """Compute jaccard score between a box and the anchors.
        """
        int_ymin = tf.maximum(ymin, bbox[0])
        int_xmin = tf.maximum(xmin, bbox[1])
        int_ymax = tf.minimum(ymax, bbox[2])
        int_xmax = tf.minimum(xmax, bbox[3])
        h = tf.maximum(int_ymax - int_ymin, 0.)
        w = tf.maximum(int_xmax - int_xmin, 0.)
        # Volumes.
        inter_vol = h * w
        union_vol = vol_anchors - inter_vol \
            + (bbox[2] - bbox[0]) * (bbox[3] - bbox[1])
        jaccard = tf.div(inter_vol, union_vol)
        return jaccard
        
	#计算重合部分占default box面积的比例
    def intersection_with_anchors(bbox):
        """Compute intersection between score a box and the anchors.
        """
        int_ymin = tf.maximum(ymin, bbox[0])
        int_xmin = tf.maximum(xmin, bbox[1])
        int_ymax = tf.minimum(ymax, bbox[2])
        int_xmax = tf.minimum(xmax, bbox[3])
        h = tf.maximum(int_ymax - int_ymin, 0.)
        w = tf.maximum(int_xmax - int_xmin, 0.)
        inter_vol = h * w
        scores = tf.div(inter_vol, vol_anchors)
        return scores
        
	#循环条件,对所有的ground truth进行循环
    def condition(i, feat_labels, feat_scores,
                  feat_ymin, feat_xmin, feat_ymax, feat_xmax):
        """Condition: check label index.
        """
        r = tf.less(i, tf.shape(labels))
        return r[0]
        
	# 寻找与default box的IOU最大的真实框,并更新feat_labels、feat_scores和对应的坐标
    def body(i, feat_labels, feat_scores,
             feat_ymin, feat_xmin, feat_ymax, feat_xmax):
             
        label = labels[i]
        bbox = bboxes[i]
        #计算IOU
        jaccard = jaccard_with_anchors(bbox)
        # 如果IOU大于历史feat_scores,说明与该组ground truth重合率较高,需进行更新,该default box的对应mask元素为True
        mask = tf.greater(jaccard, feat_scores)
        # mask = tf.logical_and(mask, tf.greater(jaccard, matching_threshold))
        #应该是feat_scores > 0.5?
        mask = tf.logical_and(mask, feat_scores > -0.5)
        mask = tf.logical_and(mask, label < num_classes)
        imask = tf.cast(mask, tf.int64)
        fmask = tf.cast(mask, dtype)
        # 更新mask为True位置的feat_labels和feat_scores
        feat_labels = imask * label + (1 - imask) * feat_labels #可以tf.where(mask, label, feat_labels)?
        feat_scores = tf.where(mask, jaccard, feat_scores)
		# 更新坐标为ground truth的bbox坐标
        feat_ymin = fmask * bbox[0] + (1 - fmask) * feat_ymin
        feat_xmin = fmask * bbox[1] + (1 - fmask) * feat_xmin
        feat_ymax = fmask * bbox[2] + (1 - fmask) * feat_ymax
        feat_xmax = fmask * bbox[3] + (1 - fmask) * feat_xmax

        # Check no annotation label: ignore these anchors...
        # interscts = intersection_with_anchors(bbox)
        # mask = tf.logical_and(interscts > ignore_threshold,
        #                       label == no_annotation_label)
        # # Replace scores by -1.
        # feat_scores = tf.where(mask, -tf.cast(mask, dtype), feat_scores)

        return [i+1, feat_labels, feat_scores,
                feat_ymin, feat_xmin, feat_ymax, feat_xmax]
    # Main loop definition.
    i = 0
    [i, feat_labels, feat_scores,
     feat_ymin, feat_xmin,
     feat_ymax, feat_xmax] = tf.while_loop(condition, body,
                                           [i, feat_labels, feat_scores,
                                            feat_ymin, feat_xmin,
                                            feat_ymax, feat_xmax])
    # 转化为中心点坐标和h,w的形式
    feat_cy = (feat_ymax + feat_ymin) / 2.
    feat_cx = (feat_xmax + feat_xmin) / 2.
    feat_h = feat_ymax - feat_ymin
    feat_w = feat_xmax - feat_xmin
    # 计算偏移量,实际计算loss时使用的是偏移量
    feat_cy = (feat_cy - yref) / href / prior_scaling[0]
    feat_cx = (feat_cx - xref) / wref / prior_scaling[1]
    feat_h = tf.log(feat_h / href) / prior_scaling[2]
    feat_w = tf.log(feat_w / wref) / prior_scaling[3]
    # Use SSD ordering: x / y / w / h instead of ours.
    feat_localizations = tf.stack([feat_cx, feat_cy, feat_w, feat_h], axis=-1)
    return feat_labels, feat_localizations, feat_scores

  • 2
    点赞
  • 3
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值