1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 | # -------------------------------------------------------- # Faster R-CNN # Copyright (c) 2015 Microsoft # Licensed under The MIT License [see LICENSE for details] # Written by Ross Girshick and Sean Bell # -------------------------------------------------------- import sys sys.path.append('../') import numpy as np import numpy.random as npr import tensorflow as tf from Lib.bbox_overlaps import bbox_overlaps from Lib.bbox_transform import bbox_transform from Lib.faster_rcnn_config import cfg from Lib.generate_anchors import generate_anchors #该函数计算每个anchor对应的ground truth(前景/背景,坐标偏移值) def anchor_target_layer(rpn_cls_score, gt_boxes, im_dims, _feat_stride, anchor_scales): ''' Make Python version of _anchor_target_layer_py below Tensorflow compatible ''' #执行_anchor_target_layer_py函数,传参有网络预测的rpn分类分数,ground_truth_box,图像的尺寸,与原图相比特征图缩小的比例和anchor的尺度 rpn_labels,rpn_bbox_targets,rpn_bbox_inside_weights,rpn_bbox_outside_weights = \ tf.py_func(_anchor_target_layer_py, [rpn_cls_score, gt_boxes, im_dims, _feat_stride, anchor_scales], [tf.float32, tf.float32, tf.float32, tf.float32]) #转化成tensor rpn_labels = tf.convert_to_tensor(tf.cast(rpn_labels,tf.int32), name = 'rpn_labels') rpn_bbox_targets = tf.convert_to_tensor(rpn_bbox_targets, name = 'rpn_bbox_targets') rpn_bbox_inside_weights = tf.convert_to_tensor(rpn_bbox_inside_weights , name = 'rpn_bbox_inside_weights') rpn_bbox_outside_weights = tf.convert_to_tensor(rpn_bbox_outside_weights , name = 'rpn_bbox_outside_weights') return rpn_labels, rpn_bbox_targets, rpn_bbox_inside_weights, rpn_bbox_outside_weights def _anchor_target_layer_py(rpn_cls_score, gt_boxes, im_dims, _feat_stride, anchor_scales): """ Python version Assign anchors to ground-truth targets. Produces anchor classification labels and bounding-box regression targets. # Algorithm: # # for each (H, W) location i # generate 9 anchor boxes centered on cell i # apply predicted bbox deltas at cell i to each of the 9 anchors # filter out-of-image anchors # measure GT overlap """ im_dims = im_dims[0] #获得原图的尺度[height, width] _anchors = generate_anchors(scales=np.array(anchor_scales))# 在原图上生成9个锚点,16*16,shape: [9,4] _num_anchors = _anchors.shape[0] #_num_anchors值为9 # allow boxes to sit over the edge by a small amount _allowed_border = 0 #将anchor超出边界的限度设置为0 # Only minibatch of 1 supported 在这里核验batch_size是否为1 assert rpn_cls_score.shape[0] == 1, \ 'Only single item batches are supported' # map of shape (..., H, W) height, width = rpn_cls_score.shape[1:3] #在这里得到了rpn输出的H和W,总的anchor数目应该是H×W×9 # 1. Generate proposals from bbox deltas and shifted anchors #下面是在原图上生成anchor shift_x = np.arange(0, width) * _feat_stride #shape: [width,] shift_y = np.arange(0, height) * _feat_stride #shape: [height,] shift_x, shift_y = np.meshgrid(shift_x, shift_y) #生成网格 shift_x shape: [height, width], shift_y shape: [height, width] shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(), shift_y.ravel())).transpose() # shape[height*width, 4] # add A anchors (1, A, 4) to # cell K shifts (K, 1, 4) to get # shift anchors (K, A, 4) # reshape to (K*A, 4) shifted anchors A = _num_anchors # A = 9 K = shifts.shape[0] # K=height*width(特征图上的) all_anchors = (_anchors.reshape((1, A, 4)) + shifts.reshape((1, K, 4)).transpose((1, 0, 2))) #shape[K,A,4] 得到所有的anchor all_anchors = all_anchors.reshape((K * A, 4)) total_anchors = int(K * A) #total_anchors记录anchor的数目 # 2.anchors inside the image #inds_inside=所有的anchor中x1,y1,x2,y2没有超过图像边界的 inds_inside = np.where( (all_anchors[:, 0] >= -_allowed_border) & (all_anchors[:, 1] >= -_allowed_border) & (all_anchors[:, 2] < im_dims[1] + _allowed_border) & # width (all_anchors[:, 3] < im_dims[0] + _allowed_border) # height )[0] # keep only inside anchors anchors = all_anchors[inds_inside, :]#在这里选出合理的anchors,指的是没超出边界的 # label: 1 is positive, 0 is negative, -1 is dont care labels = np.empty((len(inds_inside), ), dtype=np.float32)#labels的长度就是合法的anchor的个数 labels.fill(-1) #先用-1填充labels # 3.overlaps between the anchors and the gt boxes # bbox_overlaps (ex, gt) 计算anchors与gt_boxes之间的重合度IOU,大于0.7标记为前景图,小于0.3标记为背景图;返回类型(n,k),即第n个anchors与第K个gt_boxes的IOU重合度值 # 对所有的没超过图像边界的anchor计算overlap,得到的shape: [len(anchors), len(gt_boxes)] # np.ascontiguousarray 返回一个地址连续的数组 overlaps = bbox_overlaps( np.ascontiguousarray(anchors, dtype=np.float), np.ascontiguousarray(gt_boxes, dtype=np.float)) #argmax(a,axis=0) 表示每一列最大值的索引 shape[0];argmax(axis=1) 表示每一行之间的最大值的索引;shape[1] argmax_overlaps = overlaps.argmax(axis=1) #对于每个anchor,找到与gt_box坐标的IOU的最大值,即找到每个anchors最大重叠率的gt_boxes。 max_overlaps = overlaps[np.arange(len(inds_inside)), argmax_overlaps] #对于每个anchor,找到最大的overlap的gt_box shape: [len(anchors)] gt_argmax_overlaps = overlaps.argmax(axis=0) #对于每个gt_box,找到对应的最大overlap的anchor。shape[len(gt_boxes),] gt_max_overlaps = overlaps[gt_argmax_overlaps, np.arange(overlaps.shape[1])]#对于每个gt_box,找到与anchor的最大IoU值。shape[len(gt_boxes),] gt_argmax_overlaps = np.where(overlaps == gt_max_overlaps)[0]#再次对于每个gt_box,找到对应的最大overlap的anchor。shape[len(gt_boxes),] # 4.根据预设阈值和overlap重叠率,打上前背景标签1|0 if not cfg.TRAIN.RPN_CLOBBER_POSITIVES: #如果不需要抑制positive的anchor,就先给背景anchor赋值,这样在赋前景值的时候可以覆盖。 # assign bg labels first so that positive labels can clobber them labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0 #在这里将anchors与gt_boxes最大IoU仍然小于阈值(0.3)的某些anchor置0 # fg label: for each gt, anchor with highest overlap labels[gt_argmax_overlaps] = 1 #在这里将每个gt_box对应IoU最大的anchor置1 # fg label: above threshold IOU labels[max_overlaps >= cfg.TRAIN.RPN_POSITIVE_OVERLAP] = 1 #在这里将anchors与gt_boxes最大IoU大于阈值(0.7)的某些anchor置1 if cfg.TRAIN.RPN_CLOBBER_POSITIVES: #如果需要抑制positive的anchor,就将背景anchor后赋值 # assign bg labels last so that negative labels can clobber positives labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0 #在这里将最大IoU仍然小于阈值(0.3)的某些anchor置0 # subsample positive labels if we have too many # 5.随机抛弃一些前景anchor和背景anchors num_fg = int(cfg.TRAIN.RPN_FG_FRACTION * cfg.TRAIN.RPN_BATCHSIZE)#计算出一个训练batch中需要的前景的数量 fg_inds = np.where(labels == 1)[0] #找出被置为前景的anchors if len(fg_inds) > num_fg: disable_inds = npr.choice( fg_inds, size=(len(fg_inds) - num_fg), replace=False) labels[disable_inds] = -1 #如果事实存在的前景anchor大于了所需值,就随机抛弃一些前景anchor # subsample negative labels if we have too many num_bg = cfg.TRAIN.RPN_BATCHSIZE - np.sum(labels == 1) ##计算出一个训练batch中需要的背景的数量 bg_inds = np.where(labels == 0)[0] #找出被置为背景的anchors if len(bg_inds) > num_bg: disable_inds = npr.choice( bg_inds, size=(len(bg_inds) - num_bg), replace=False) labels[disable_inds] = -1 #如果事实存在的背景anchor大于了所需值,就随机抛弃一些背景anchor # bbox_targets: The deltas (relative to anchors) that Faster R-CNN should # try to predict at each anchor # TODO: This "weights" business might be deprecated. Requires investigation # 6.对于每个anchor与其对应的gt_boxes,得到四个坐标变换值(tx,ty,th,tw)。 bbox_targets = np.zeros((len(inds_inside), 4), dtype=np.float32) #对每个在原图内部的anchor,用全0初始化坐标变换值 bbox_targets = _compute_targets(anchors, gt_boxes[argmax_overlaps, :]) #对于每个anchor与最大的overlap的gt_boxes的框偏移量,作为标签值用于后续框回归 # 7.计算positive_weights和negative_weights bbox_inside_weights = np.zeros((len(inds_inside), 4), dtype=np.float32) #使用全0初始化inside_weights bbox_inside_weights[labels == 1, :] = np.array(cfg.TRAIN.RPN_BBOX_INSIDE_WEIGHTS) #在前景anchor处赋权重1 bbox_outside_weights = np.zeros((len(inds_inside), 4), dtype=np.float32) #使用全0初始化outside_weights if cfg.TRAIN.RPN_POSITIVE_WEIGHT < 0: #如果RPN_POSITIVE_WEIGHT小于0的话, # uniform weighting of examples (given non-uniform sampling) num_examples = np.sum(labels >= 0) positive_weights = np.ones((1, 4)) * 1.0 / num_examples #则positive_weights和negative_weights都一样 negative_weights = np.ones((1, 4)) * 1.0 / num_examples else: assert ((cfg.TRAIN.RPN_POSITIVE_WEIGHT > 0) & (cfg.TRAIN.RPN_POSITIVE_WEIGHT < 1)) #如果RPN_POSITIVE_WEIGHT位于0和1之间的话, positive_weights = (cfg.TRAIN.RPN_POSITIVE_WEIGHT / np.sum(labels == 1)) negative_weights = ((1.0 - cfg.TRAIN.RPN_POSITIVE_WEIGHT) / np.sum(labels == 0)) #则positive_weights和negative_weights分别赋值 bbox_outside_weights[labels == 1, :] = positive_weights bbox_outside_weights[labels == 0, :] = negative_weights #将positive_weights和negative_weights赋给bbox_outside_weights # map up to original set of anchors # 8.统一所有的标签,并转化标签labels的格式后,返回rpn_labels,rpn_bbox_targets,rpn_bbox_inside_weights,rpn_bbox_outside_weights labels = _unmap(labels, total_anchors, inds_inside, fill=-1)#把图像内部的anchor对应的label映射回总的anchor(加上了那些超出边界的anchor,类别填充-1) bbox_targets = _unmap(bbox_targets, total_anchors, inds_inside, fill=0)#把图像内部的anchor对应的bbox_target映射回所有的anchor(加上了那些超出边界的anchor,填充0) bbox_inside_weights = _unmap(bbox_inside_weights, total_anchors, inds_inside, fill=0) #把图像内部的anchor对应的inside_weights映射回总的anchor(加上了那些超出边界的anchor,填充0) bbox_outside_weights = _unmap(bbox_outside_weights, total_anchors, inds_inside, fill=0) #把图像内部的anchor对应的outside_weights映射回总的anchor(加上了那些超出边界的anchor,填充0) # labels labels = labels.reshape((1, height, width, A)).transpose(0, 3, 1, 2) labels = labels.reshape((1, 1, A * height, width)) #将anchor的类别label数组形状置为[1,1,9*height,width] rpn_labels = labels # bbox_targets rpn_bbox_targets = bbox_targets.reshape((1, height, width, A * 4)).transpose(0, 3, 1, 2) #将anchor的位置映射数组的形状置为[1,9*4,height,width] # bbox_inside_weights rpn_bbox_inside_weights = bbox_inside_weights.reshape((1, height, width, A * 4)).transpose(0, 3, 1, 2) #将anchor的inside_weights数组的形状置为[1,9*4,height,width] # bbox_outside_weights rpn_bbox_outside_weights = bbox_outside_weights.reshape((1, height, width, A * 4)).transpose(0, 3, 1, 2) #将anchor的outside_weights数组的形状置为[1,9*4,height,width] return rpn_labels,rpn_bbox_targets,rpn_bbox_inside_weights,rpn_bbox_outside_weights #返回所有的ground truth值 def _unmap(data, count, inds, fill=0): #_unmap函数将图像内部的anchor映射回到生成的所有的anchor """ Unmap a subset of item (data) back to the original set of items (of size count) """ if len(data.shape) == 1: ret = np.empty((count, ), dtype=np.float32) ret.fill(fill) ret[inds] = data else: ret = np.empty((count, ) + data.shape[1:], dtype=np.float32) ret.fill(fill) ret[inds, :] = data return ret def _compute_targets(ex_rois, gt_rois): #_compute_targets函数计算anchor和对应的gt_box的位置映射 """Compute bounding-box regression targets for an image.""" assert ex_rois.shape[0] == gt_rois.shape[0] assert ex_rois.shape[1] == 4 assert gt_rois.shape[1] == 5 return bbox_transform(ex_rois, gt_rois[:, :4]).astype(np.float32, copy=False) |