老卫带你学---faster-rcnn源码剖析(anchor_target_layer.py源码详解)

RPN网络是faster与fast的主要区别,输入特征图,输出region proposals以及相应的分数

# --------------------------------------------------------  
# Faster R-CNN  
# Copyright (c) 2015 Microsoft  
# Licensed under The MIT License [see LICENSE for details]  
# Written by Ross Girshick and Sean Bell  
# --------------------------------------------------------  
  
import os  
import caffe  
import yaml  
from fast_rcnn.config import cfg  
import numpy as np  
import numpy.random as npr  
from generate_anchors import generate_anchors  
from utils.cython_bbox import bbox_overlaps  
from fast_rcnn.bbox_transform import bbox_transform  
  
DEBUG = False  
  
class AnchorTargetLayer(caffe.Layer):  
    """ 
    Assign anchors to ground-truth targets. Produces anchor classification 
    labels and bounding-box regression targets. 
    """  
    #生成anchors,reshap输出
    def setup(self, bottom, top):  
        layer_params = yaml.load(self.param_str_)  
        anchor_scales = layer_params.get('scales', (8, 16, 32))  
        self._anchors = generate_anchors(scales=np.array(anchor_scales))#九个anchor的w h x_cstr y_cstr,对原始的wh做横向纵向变化,并放大缩小得到九个  
        self._num_anchors = self._anchors.shape[0]<span style="font-family: Arial, Helvetica, sans-serif;">#anchor的个数</span>  
        self._feat_stride = layer_params['feat_stride']#网络中参数16 (feature map为原图大小的1/16,1000*600->60*40) 
  
        if DEBUG:  
            print 'anchors:'  
            print self._anchors  
            print 'anchor shapes:'  
            print np.hstack((  
                self._anchors[:, 2::4] - self._anchors[:, 0::4],  
                self._anchors[:, 3::4] - self._anchors[:, 1::4],  
            ))  
            self._counts = cfg.EPS  
            self._sums = np.zeros((1, 4))  
            self._squared_sums = np.zeros((1, 4))  
            self._fg_sum = 0  
            self._bg_sum = 0  
            self._count = 0  
  
        # allow boxes to sit over the edge by a small amount  
        self._allowed_border = layer_params.get('allowed_border', 0)  
        #bottom 长度为4;bottom[0],map;bottom[1],boxes,labels;bottom[2],im_fo;bottom[3],图片数据  
        height, width = bottom[0].data.shape[-2:]  
        if DEBUG:  
            print 'AnchorTargetLayer: height', height, 'width', width  
  
        A = self._num_anchors#anchor的个数  
        # labels  
        top[0].reshape(1, 1, A * height, width)  
        # bbox_targets  
        top[1].reshape(1, A * 4, height, width)  
        # bbox_inside_weights  
        top[2].reshape(1, A * 4, height, width)  
        # bbox_outside_weights  
        top[3].reshape(1, A * 4, height, width)  
   #每个位置生成9个anchor,通过GT overlap过滤掉一部分anchors
    def forward(self, bottom, top):  
        # Algorithm:  
        #  
        # for each (H, W) location i  
        #   generate 9 anchor boxes centered on cell i  
        #   apply predicted bbox deltas at cell i to each of the 9 anchors  
        # filter out-of-image anchors  
        # measure GT overlap  
  
        assert bottom[0].data.shape[0] == 1, \  
            'Only single item batches are supported'  
  
        #取得相应的anchors的h,w以及gt_box的位置,label
        # map of shape (..., H, W)  
        height, width = bottom[0].data.shape[-2:]  
        # GT boxes (x1, y1, x2, y2, label)  
        gt_boxes = bottom[1].data#gt_boxes:长度不定  
        # im_info  
        im_info = bottom[2].data[0, :]  
  
        if DEBUG:  
            print ''  
            print 'im_size: ({}, {})'.format(im_info[0], im_info[1])  
            print 'scale: {}'.format(im_info[2])  
            print 'height, width: ({}, {})'.format(height, width)  
            print 'rpn: gt_boxes.shape', gt_boxes.shape  
            print 'rpn: gt_boxes', gt_boxes  
  
        #算出box的偏移量
        # 1. Generate proposals from bbox deltas and shifted anchors  
        shift_x = np.arange(0, width) * self._feat_stride  
        shift_y = np.arange(0, height) * self._feat_stride  
        shift_x, shift_y = np.meshgrid(shift_x, shift_y)  
        shifts = np.vstack((shift_x.ravel(), shift_y.ravel(),  
                            shift_x.ravel(), shift_y.ravel())).transpose()  
        # add A anchors (1, A, 4) to  根据偏移量移动anchors
        # cell K shifts (K, 1, 4) to get  
        # shift anchors (K, A, 4)  
        # reshape to (K*A, 4) shifted anchors  
        A = self._num_anchors  
        K = shifts.shape[0]  
        all_anchors = (self._anchors.reshape((1, A, 4)) +  
                       shifts.reshape((1, K, 4)).transpose((1, 0, 2)))  
        all_anchors = all_anchors.reshape((K * A, 4))  
        total_anchors = int(K * A)#K*A,所有anchors个数,包括越界的  
        #K: width*height  
        #A: 9  
        # only keep anchors inside the image  
        inds_inside = np.where(  
            (all_anchors[:, 0] >= -self._allowed_border) &  
            (all_anchors[:, 1] >= -self._allowed_border) &  
            (all_anchors[:, 2] < im_info[1] + self._allowed_border) &  # width  
            (all_anchors[:, 3] < im_info[0] + self._allowed_border)    # height  
        )[0]#没有过界的anchors索引  
  
        if DEBUG:  
            print 'total_anchors', total_anchors  
            print 'inds_inside', len(inds_inside)  
  
        # keep only inside anchors  
        anchors = all_anchors[inds_inside, :]#没有过界的anchors  
        if DEBUG:  
            print 'anchors.shape', anchors.shape  
  
        # label: 1 is positive, 0 is negative, -1 is dont care  
        labels = np.empty((len(inds_inside), ), dtype=np.float32)  
        labels.fill(-1)  
  
        # overlaps between the anchors and the gt boxes  
        # overlaps (ex, gt)  
        overlaps = bbox_overlaps( #返回大小连续的overlaps,等同于排序
            np.ascontiguousarray(anchors, dtype=np.float),  
            np.ascontiguousarray(gt_boxes, dtype=np.float))  
        #找到某个box与所有gt_box最大的overlaps 
        argmax_overlaps = overlaps.argmax(axis=1)#overlaps每行最大值索引  
        max_overlaps = overlaps[np.arange(len(inds_inside)), argmax_overlaps]#最大的overlaps值  
        #找到某gt_box与所有box最大的overlaps
        gt_argmax_overlaps = overlaps.argmax(axis=0) #overlaps每列中最大值索引 
        gt_max_overlaps = overlaps[gt_argmax_overlaps,#其对应的overlaps值  
                                   np.arange(overlaps.shape[1])]  
        gt_argmax_overlaps = np.where(overlaps == gt_max_overlaps)[0]  
        
        if not cfg.TRAIN.RPN_CLOBBER_POSITIVES:  
            # assign bg labels first so that positive labels can clobber them  
            labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0  
        //对于某个gt,overlap最大的anchor为1
        # fg label: for each gt, anchor with highest overlap  
        labels[gt_argmax_overlaps] = 1  
        //对于某个anchor,其overlap超过某值为1
        # fg label: above threshold IOU  
        labels[max_overlaps >= cfg.TRAIN.RPN_POSITIVE_OVERLAP] = 1  
        
        if cfg.TRAIN.RPN_CLOBBER_POSITIVES:  
            # assign bg labels last so that negative labels can clobber positives  
            labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0  
  
        # subsample positive labels if we have too many 如果正样本较多,降采样 
        num_fg = int(cfg.TRAIN.RPN_FG_FRACTION * cfg.TRAIN.RPN_BATCHSIZE) //正样本数量 
        fg_inds = np.where(labels == 1)[0]  
        if len(fg_inds) > num_fg:  
            disable_inds = npr.choice(  
                fg_inds, size=(len(fg_inds) - num_fg), replace=False)  
            labels[disable_inds] = -1 //多余正样本被随机标为负样本(这样真的好吗?) 
  
        # subsample negative labels if we have too many  同样处理负样本
        num_bg = cfg.TRAIN.RPN_BATCHSIZE - np.sum(labels == 1)  
        bg_inds = np.where(labels == 0)[0]  
        if len(bg_inds) > num_bg:  
            disable_inds = npr.choice(  
                bg_inds, size=(len(bg_inds) - num_bg), replace=False)  
            labels[disable_inds] = -1  //仍然标为负?
            #print "was %s inds, disabling %s, now %s inds" % (  
                #len(bg_inds), len(disable_inds), np.sum(labels == 0))  
        #保留最大overlaps的anchors,其他为0(非极大值抑制?)
        bbox_targets = np.zeros((len(inds_inside), 4), dtype=np.float32)  
        bbox_targets = _compute_targets(anchors, gt_boxes[argmax_overlaps, :]) ## 
        #正样本inside_weights为1,其余为0(等同于论文中的pi*)
        bbox_inside_weights = np.zeros((len(inds_inside), 4), dtype=np.float32)  
        bbox_inside_weights[labels == 1, :] = np.array(cfg.TRAIN.RPN_BBOX_INSIDE_WEIGHTS)  
        
        bbox_outside_weights = np.zeros((len(inds_inside), 4), dtype=np.float32)  
        
       #对样本权重进行归一化
       if cfg.TRAIN.RPN_POSITIVE_WEIGHT < 0:  # uniform weighting of examples (given non-uniform sampling)  
            num_examples = np.sum(labels >= 0)  
            positive_weights = np.ones((1, 4)) * 1.0 / num_examples  
            negative_weights = np.ones((1, 4)) * 1.0 / num_examples  
        else:  
            assert ((cfg.TRAIN.RPN_POSITIVE_WEIGHT > 0) &  
                    (cfg.TRAIN.RPN_POSITIVE_WEIGHT < 1))  
            positive_weights = (cfg.TRAIN.RPN_POSITIVE_WEIGHT /  
                                np.sum(labels == 1))  
            negative_weights = ((1.0 - cfg.TRAIN.RPN_POSITIVE_WEIGHT) /  
                                np.sum(labels == 0))  
        bbox_outside_weights[labels == 1, :] = positive_weights  
        bbox_outside_weights[labels == 0, :] = negative_weights  
  
        if DEBUG:  
            self._sums += bbox_targets[labels == 1, :].sum(axis=0)  
            self._squared_sums += (bbox_targets[labels == 1, :] ** 2).sum(axis=0)  
            self._counts += np.sum(labels == 1)  
            means = self._sums / self._counts  
            stds = np.sqrt(self._squared_sums / self._counts - means ** 2)  
            print 'means:'  
            print means  
            print 'stdevs:'  
            print stds  
  
        # map up to original set of anchors  对total_anchors的其他box,weights及label进行填充
        labels = _unmap(labels, total_anchors, inds_inside, fill=-1)  
        bbox_targets = _unmap(bbox_targets, total_anchors, inds_inside, fill=0)  
        bbox_inside_weights = _unmap(bbox_inside_weights, total_anchors, inds_inside, fill=0)  
        bbox_outside_weights = _unmap(bbox_outside_weights, total_anchors, inds_inside, fill=0)  
  
        if DEBUG:  
            print 'rpn: max max_overlap', np.max(max_overlaps)  
            print 'rpn: num_positive', np.sum(labels == 1)  
            print 'rpn: num_negative', np.sum(labels == 0)  
            self._fg_sum += np.sum(labels == 1)  
            self._bg_sum += np.sum(labels == 0)  
            self._count += 1  
            print 'rpn: num_positive avg', self._fg_sum / self._count  
            print 'rpn: num_negative avg', self._bg_sum / self._count  
  
        # labels 输出标签、box、inside_weights、outside_weights 
        labels = labels.reshape((1, height, width, A)).transpose(0, 3, 1, 2)  
        labels = labels.reshape((1, 1, A * height, width))  
        top[0].reshape(*labels.shape)  
        top[0].data[...] = labels  
  
        # bbox_targets  
        bbox_targets = bbox_targets \  
            .reshape((1, height, width, A * 4)).transpose(0, 3, 1, 2)  
        top[1].reshape(*bbox_targets.shape)  
        top[1].data[...] = bbox_targets  
  
        # bbox_inside_weights  
        bbox_inside_weights = bbox_inside_weights \  
            .reshape((1, height, width, A * 4)).transpose(0, 3, 1, 2)  
        assert bbox_inside_weights.shape[2] == height  
        assert bbox_inside_weights.shape[3] == width  
        top[2].reshape(*bbox_inside_weights.shape)  
        top[2].data[...] = bbox_inside_weights  
  
        # bbox_outside_weights  
        bbox_outside_weights = bbox_outside_weights \  
            .reshape((1, height, width, A * 4)).transpose(0, 3, 1, 2)  
        assert bbox_outside_weights.shape[2] == height  
        assert bbox_outside_weights.shape[3] == width  
        top[3].reshape(*bbox_outside_weights.shape)  
        top[3].data[...] = bbox_outside_weights  
  
    def backward(self, top, propagate_down, bottom):  
        """This layer does not propagate gradients."""  
        pass  
  
    def reshape(self, bottom, top):  
        """Reshaping happens during the call to forward."""  
        pass  
  
  
def _unmap(data, count, inds, fill=0):  
    """ Unmap a subset of item (data) back to the original set of items (of 
    size count) """  #对于total_anchors,保留设定的label,其余填为fill
    if len(data.shape) == 1:  
        ret = np.empty((count, ), dtype=np.float32)  
        ret.fill(fill)  
        ret[inds] = data  
    else:  
        ret = np.empty((count, ) + data.shape[1:], dtype=np.float32)  
        ret.fill(fill)  
        ret[inds, :] = data  
    return ret  
  
  
def _compute_targets(ex_rois, gt_rois):  
    """Compute bounding-box regression targets for an image."""  
  
    assert ex_rois.shape[0] == gt_rois.shape[0]  
    assert ex_rois.shape[1] == 4  
    assert gt_rois.shape[1] == 5  
  
    return bbox_transform(ex_rois, gt_rois[:, :4]).astype(np.float32, copy=False)

这段代码主要生成anchors,算出anchors的偏移量,并根据与gt的overlaps,进行NMS及排序,赋予其相应的标签。

其中generate_anchors.py的源码如下。这段代码生成不同宽高比(1:2,1:1,2:1)、不同尺度(8 16 32)的anchors:

<span style="font-size:24px;">#功能描述:生成多尺度、多宽高比的anchors。  
#          尺度为:128,256,512; 宽高比为:1:2,1:1,2:1  
  
import numpy as np  #提供矩阵运算功能的库  
  
#生成anchors总函数:ratios为一个列表,表示宽高比为:1:2,1:1,2:1  
#2**x表示:2^x,scales:[2^3 2^4 2^5],即:[8 16 32]  
def generate_anchors(base_size=16, ratios=[0.5, 1, 2],  
                     scales=2**np.arange(3, 6)):  
    """ 
    Generate anchor (reference) windows by enumerating aspect ratios X 
    scales wrt a reference (0, 0, 15, 15) window. 
    """  
    base_anchor = np.array([1, 1, base_size, base_size]) - 1  #新建一个数组:base_anchor:[0 0 15 15]  
    ratio_anchors = _ratio_enum(base_anchor, ratios)  #枚举各种宽高比  
    anchors = np.vstack([_scale_enum(ratio_anchors[i, :], scales)  #枚举各种尺度,vstack:竖向合并数组  
                         for i in xrange(ratio_anchors.shape[0])]) #shape[0]:读取矩阵第一维长度,其值为3  
    return anchors  
  
#用于返回width,height,(x,y)中心坐标(对于一个anchor窗口)  
def _whctrs(anchor):  
    """ 
    Return width, height, x center, and y center for an anchor (window). 
    """  
    #anchor:存储了窗口左上角,右下角的坐标  
    w = anchor[2] - anchor[0] + 1  
    h = anchor[3] - anchor[1] + 1  
    x_ctr = anchor[0] + 0.5 * (w - 1)  #anchor中心点坐标  
    y_ctr = anchor[1] + 0.5 * (h - 1)  
    return w, h, x_ctr, y_ctr  
  
#给定一组宽高向量,输出各个anchor,即预测窗口,**输出anchor的面积相等,只是宽高比不同**  
def _mkanchors(ws, hs, x_ctr, y_ctr):  
    #ws:[23 16 11],hs:[12 16 22],ws和hs一一对应。  
    """ 
    Given a vector of widths (ws) and heights (hs) around a center 
    (x_ctr, y_ctr), output a set of anchors (windows). 
    """  
    ws = ws[:, np.newaxis]  #newaxis:将数组转置  
    hs = hs[:, np.newaxis]  
    anchors = np.hstack((x_ctr - 0.5 * (ws - 1),    #hstack、vstack:合并数组  
                         y_ctr - 0.5 * (hs - 1),    #anchor:[[-3.5 2 18.5 13]  
                         x_ctr + 0.5 * (ws - 1),     #        [0  0  15  15]  
                         y_ctr + 0.5 * (hs - 1)))     #       [2.5 -3 12.5 18]]  
    return anchors  
  
#枚举一个anchor的各种宽高比,anchor[0 0 15 15],ratios[0.5,1,2]  
def _ratio_enum(anchor, ratios):  
    """   列举关于一个anchor的三种宽高比 1:2,1:1,2:1 
    Enumerate a set of anchors for each aspect ratio wrt an anchor. 
    """  
  
    w, h, x_ctr, y_ctr = _whctrs(anchor)  #返回宽高和中心坐标,w:16,h:16,x_ctr:7.5,y_ctr:7.5  
    size = w * h   #size:16*16=256  
    size_ratios = size / ratios  #256/ratios[0.5,1,2]=[512,256,128]  
    #round()方法返回x的四舍五入的数字,sqrt()方法返回数字x的平方根  
    ws = np.round(np.sqrt(size_ratios)) #ws:[23 16 11]  
    hs = np.round(ws * ratios)    #hs:[12 16 22],ws和hs一一对应。as:23&12  
    anchors = _mkanchors(ws, hs, x_ctr, y_ctr)  #给定一组宽高向量,输出各个预测窗口  
    return anchors  
  
#枚举一个anchor的各种尺度,以anchor[0 0 15 15]为例,scales[8 16 32]  
def _scale_enum(anchor, scales):  
    """   列举关于一个anchor的三种尺度 128*128,256*256,512*512 
    Enumerate a set of anchors for each scale wrt an anchor. 
    """  
    w, h, x_ctr, y_ctr = _whctrs(anchor) #返回宽高和中心坐标,w:16,h:16,x_ctr:7.5,y_ctr:7.5  
    ws = w * scales   #[128 256 512]  
    hs = h * scales   #[128 256 512]  
    anchors = _mkanchors(ws, hs, x_ctr, y_ctr) #[[-56 -56 71 71] [-120 -120 135 135] [-248 -248 263 263]]  
    return anchors  
  
if __name__ == '__main__':  #主函数  
    import time  
    t = time.time()  
    a = generate_anchors()  #生成anchor(窗口)  
    print time.time() - t   #显示时间  
    print a  
    from IPython import embed; embed()  
</span>
以下是一个基于深度习的目检测代码示例,使用的是 TensorFlow 和 Keras 框架。这个代码示例使用的是 Faster R-CNN 模型,可以在 COCO 数据集上进行训练和测试,同时还包括了数据增强和模型评估等功能。 ```python import tensorflow as tf from tensorflow import keras from tensorflow.keras import layers from tensorflow.keras import models from tensorflow.keras import optimizers from tensorflow.keras import backend as K from tensorflow.keras.layers import Input from tensorflow.keras.applications import ResNet50 from tensorflow.keras.layers import Conv2D from tensorflow.keras.layers import MaxPooling2D from tensorflow.keras.layers import Flatten from tensorflow.keras.layers import Dense from tensorflow.keras.layers import Dropout from tensorflow.keras.layers import GlobalAveragePooling2D from tensorflow.keras.layers import GlobalMaxPooling2D from tensorflow.keras.layers import TimeDistributed from tensorflow.keras.layers import AveragePooling2D from tensorflow.keras.layers import BatchNormalization from tensorflow.keras.layers import Activation from tensorflow.keras.layers import Add from tensorflow.keras.layers import ZeroPadding2D from tensorflow.keras.layers import Cropping2D from tensorflow.keras.layers import Lambda from tensorflow.keras.layers import Reshape from tensorflow.keras.layers import Concatenate from tensorflow.keras.layers import Softmax from tensorflow.keras.models import Model from tensorflow.keras.callbacks import TensorBoard, ModelCheckpoint, ReduceLROnPlateau, EarlyStopping from tensorflow.keras.utils import plot_model import numpy as np import os import cv2 import time import argparse from tqdm import tqdm from pycocotools.coco import COCO from pycocotools import mask as maskUtils os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' np.random.seed(42) tf.random.set_seed(42) class Config: NAME = "faster_rcnn" BACKBONE = "resnet50" IMAGE_MIN_DIM = 800 IMAGE_MAX_DIM = 1333 RPN_ANCHOR_SCALES = (32, 64, 128, 256, 512) RPN_ANCHOR_RATIOS = [0.5, 1, 2] RPN_ANCHOR_STRIDE = 16 RPN_NMS_THRESHOLD = 0.7 RPN_TRAIN_ANCHORS_PER_IMAGE = 256 RPN_POSITIVE_RATIO = 0.5 DETECTION_MIN_CONFIDENCE = 0.7 DETECTION_NMS_THRESHOLD = 0.3 DETECTION_MAX_INSTANCES = 100 LEARNING_RATE = 0.001 WEIGHT_DECAY = 0.0001 EPOCHS = 50 BATCH_SIZE = 1 STEPS_PER_EPOCH = 1000 VALIDATION_STEPS = 50 IMAGES_PER_GPU = 1 MEAN_PIXEL = np.array([123.7, 116.8, 103.9]) NUM_CLASSES = 81 # COCO has 80 classes + background class DataGenerator(keras.utils.Sequence): def __init__(self, dataset, config, shuffle=True, augment=True): self.dataset = dataset self.config = config self.shuffle = shuffle self.augment = augment self.image_ids = np.copy(self.dataset.image_ids) self.on_epoch_end() def __len__(self): return int(np.ceil(len(self.dataset.image_ids) / self.config.BATCH_SIZE)) def __getitem__(self, idx): batch_image_ids = self.image_ids[idx * self.config.BATCH_SIZE:(idx + 1) * self.config.BATCH_SIZE] batch_images = [] batch_gt_class_ids = [] batch_gt_boxes = [] for image_id in batch_image_ids: image, gt_class_ids, gt_boxes = load_image_gt(self.dataset, self.config, image_id, augment=self.augment) batch_images.append(image) batch_gt_class_ids.append(gt_class_ids) batch_gt_boxes.append(gt_boxes) batch_images = np.array(batch_images) batch_gt_class_ids = np.array(batch_gt_class_ids) batch_gt_boxes = np.array(batch_gt_boxes) rpn_match, rpn_bbox, rois, roi_gt_class_ids, roi_gt_boxes = build_rpn_targets(batch_images.shape, self.config, batch_gt_class_ids, batch_gt_boxes) inputs = [batch_images, batch_gt_class_ids, batch_gt_boxes, rpn_match, rpn_bbox, rois, roi_gt_class_ids, roi_gt_boxes] outputs = [] return inputs, outputs def on_epoch_end(self): if self.shuffle: np.random.shuffle(self.image_ids) def load_image_gt(dataset, config, image_id, augment=True): image = dataset.load_image(image_id) mask, class_ids = dataset.load_mask(image_id) bbox = maskUtils.toBbox(mask) bbox = np.expand_dims(bbox, axis=-1) class_ids = np.expand_dims(class_ids, axis=-1) gt_boxes = np.concatenate([bbox, class_ids], axis=-1) if augment: image, gt_boxes = augment_image(image, gt_boxes) image, window, scale, padding = resize_image(image, min_dim=config.IMAGE_MIN_DIM, max_dim=config.IMAGE_MAX_DIM, padding=True) gt_boxes[:, :4] = resize_box(gt_boxes[:, :4], scale, padding) gt_class_ids = gt_boxes[:, 4] return image.astype(np.float32) - config.MEAN_PIXEL, gt_class_ids.astype(np.int32), gt_boxes[:, :4].astype(np.float32) def augment_image(image, gt_boxes): if np.random.rand() < 0.5: image = np.fliplr(image) gt_boxes[:, 0] = image.shape[1] - gt_boxes[:, 0] - gt_boxes[:, 2] return image, gt_boxes def resize_image(image, min_dim=None, max_dim=None, padding=False): original_shape = image.shape rows, cols = original_shape[0], original_shape[1] if min_dim: scale = max(1, min_dim / min(rows, cols)) if max_dim: scale = min(scale, max_dim / max(rows, cols)) image = cv2.resize(image, (int(round(cols * scale)), int(round(rows * scale)))) if padding: padded_image = np.zeros((max_dim, max_dim, 3), dtype=np.float32) padded_image[:image.shape[0], :image.shape[1], :] = image window = (0, 0, image.shape[1], image.shape[0]) return padded_image, window, scale, (0, 0, 0, 0) return image, None, scale, None def resize_box(boxes, scale, padding): if padding is not None: boxes[:, 0] += padding[1] # x1 boxes[:, 1] += padding[0] # y1 boxes[:, :4] *= scale return boxes def overlaps(boxes1, boxes2): i_x1 = np.maximum(boxes1[:, 0], boxes2[:, 0]) i_y1 = np.maximum(boxes1[:, 1], boxes2[:, 1]) i_x2 = np.minimum(boxes1[:, 2], boxes2[:, 2]) i_y2 = np.minimum(boxes1[:, 3], boxes2[:, 3]) i_area = np.maximum(i_x2 - i_x1 + 1, 0) * np.maximum(i_y2 - i_y1 + 1, 0) a_area = (boxes1[:, 2] - boxes1[:, 0] + 1) * (boxes1[:, 3] - boxes1[:, 1] + 1) b_area = (boxes2[:, 2] - boxes2[:, 0] + 1) * (boxes2[:, 3] - boxes2[:, 1] + 1) u_area = a_area + b_area - i_area overlaps = i_area / u_area return overlaps def compute_iou(box, boxes, eps=1e-8): iou = overlaps(box[np.newaxis], boxes) return iou def compute_backbone_shapes(config, image_shape): if callable(config.BACKBONE): return config.BACKBONE(image_shape) assert isinstance(config.BACKBONE, str) if config.BACKBONE in ["resnet50", "resnet101"]: if image_shape[0] >= 800: return np.array([[200, 256], [100, 128], [50, 64], [25, 32], [13, 16]]) else: return np.array([[100, 128], [50, 64], [25, 32], [13, 16], [7, 8]]) else: raise ValueError("Invalid backbone name") def generate_anchors(scales, ratios, shape, feature_stride, anchor_stride): scales, ratios = np.meshgrid(np.array(scales), np.array(ratios)) scales, ratios = scales.flatten(), ratios.flatten() heights = scales / np.sqrt(ratios) widths = scales * np.sqrt(ratios) shifts_y = np.arange(0, shape[0], anchor_stride) * feature_stride shifts_x = np.arange(0, shape[1], anchor_stride) * feature_stride shifts_x, shifts_y = np.meshgrid(shifts_x, shifts_y) box_widths, box_centers_x = np.meshgrid(widths, shifts_x) box_heights, box_centers_y = np.meshgrid(heights, shifts_y) box_centers = np.stack([box_centers_y, box_centers_x], axis=2) box_sizes = np.stack([box_heights, box_widths], axis=2) box_centers = np.reshape(box_centers, [-1, 2]) box_sizes = np.reshape(box_sizes, [-1, 2]) boxes = np.concatenate([box_centers - 0.5 * box_sizes, box_centers + 0.5 * box_sizes], axis=1) boxes = np.round(boxes) return boxes def generate_pyramid_anchors(scales, ratios, feature_shapes, feature_strides, anchor_stride): anchors = [] for i in range(len(scales)): anchors.append(generate_anchors(scales[i], ratios, feature_shapes[i], feature_strides[i], anchor_stride)) return np.concatenate(anchors, axis=0) def norm_boxes(boxes, shape): boxes = boxes.astype(np.float32) h, w = shape[:2] scale = np.array([h - 1, w - 1, h - 1, w - 1]) shift = np.array([0, 0, 1, 1]) boxes = np.divide(boxes - shift, scale) boxes = np.maximum(np.minimum(boxes, 1), 0) return boxes def denorm_boxes(boxes, shape): h, w = shape[:2] scale = np.array([h - 1, w - 1, h - 1, w - 1]) shift = np.array([0, 0, 1, 1]) boxes = boxes * scale + shift return boxes.astype(np.int32) def overlaps_graph(boxes1, boxes2): b1 = tf.reshape(tf.tile(tf.expand_dims(boxes1, 1), [1, 1, tf.shape(boxes2)[0]]), [-1, 4]) b2 = tf.tile(boxes2, [tf.shape(boxes1)[0], 1]) b2 = tf.reshape(tf.transpose(b2), [-1, 4]) overlaps = compute_iou(b1, b2) overlaps = tf.reshape(overlaps, [tf.shape(boxes1)[0], tf.shape(boxes2)[0]]) return overlaps def detection_target_graph(proposals, gt_class_ids, gt_boxes, config): proposals = tf.cast(proposals, tf.float32) gt_boxes = tf.cast(gt_boxes, tf.float32) gt_class_ids = tf.cast(gt_class_ids, tf.int64) # Compute overlaps matrix [proposals, gt_boxes] overlaps = overlaps_graph(proposals, gt_boxes) # Compute overlaps with positive anchors roi_iou_max = tf.reduce_max(overlaps, axis=1) positive_roi_bool = (roi_iou_max >= config.RPN_POSITIVE_RATIO) positive_indices = tf.where(positive_roi_bool)[:, 0] # Subsample ROIs. Aim for 33% positive # Positive ROIs positive_count = int(config.RPN_TRAIN_ANCHORS_PER_IMAGE * config.RPN_POSITIVE_RATIO) positive_indices = tf.random.shuffle(positive_indices)[:positive_count] positive_count = tf.shape(positive_indices)[0] # Negative ROIs. Add enough to maintain positive:negative ratio. r = 1.0 / config.RPN_POSITIVE_RATIO negative_count = tf.cast(r * tf.cast(positive_count, tf.float32), tf.int32) - positive_count negative_indices = tf.where(roi_iou_max < config.RPN_POSITIVE_RATIO)[:, 0] negative_count = tf.math.minimum(tf.shape(negative_indices)[0], negative_count) negative_indices = tf.random.shuffle(negative_indices)[:negative_count] # Gather selected ROIs positive_rois = tf.gather(proposals, positive_indices) negative_rois = tf.gather(proposals, negative_indices) # Assign positive ROIs to GT boxes. positive_overlaps = tf.gather(overlaps, positive_indices) roi_gt_box_assignment = tf.cond( tf.greater(tf.shape(positive_overlaps)[1], 0), true_fn=lambda: tf.argmax(positive_overlaps, axis=1), false_fn=lambda: tf.cast(tf.constant([]), tf.int64) ) roi_gt_boxes = tf.gather(gt_boxes, roi_gt_box_assignment) roi_gt_class_ids = tf.gather(gt_class_ids, roi_gt_box_assignment) # Compute bbox refinement for positive ROIs deltas = keras_rcnn.backend.boxutils.bbox_transform(positive_rois, roi_gt_boxes) deltas /= tf.constant(config.BBOX_STD_DEV, dtype=tf.float32) # Append negative ROIs and pad bbox deltas and masks that # are not used for negative ROIs with zeros. rois = tf.concat([positive_rois, negative_rois], axis=0) N = tf.shape(negative_rois)[0] P = tf.math.maximum(config.RPN_TRAIN_ANCHORS_PER_IMAGE - tf.shape(rois)[0], 0) rois = tf.pad(rois, [(0, P), (0, 0)]) roi_gt_boxes = tf.pad(roi_gt_boxes, [(0, N + P), (0, 0)]) roi_gt_class_ids = tf.pad(roi_gt_class_ids, [(0, N + P)]) deltas = tf.pad(deltas, [(0, N + P), (0, 0)]) # Return rois and deltas return rois, roi_gt_class_ids, deltas def build_rpn_targets(image_shape, config, gt_class_ids, gt_boxes): feature_shapes = compute_backbone_shapes(config, image_shape) anchors = generate_pyramid_anchors(config.RPN_ANCHOR_SCALES, config.RPN_ANCHOR_RATIOS, feature_shapes, config.BACKBONE_SHAPES, config.RPN_ANCHOR_STRIDE) rpn_match, rpn_bbox = keras_rcnn.backend.anchor.get_best_anchor(anchors, gt_boxes, config) rpn_match = tf.expand_dims(rpn_match, axis=-1) rpn_bbox = tf.reshape(rpn_bbox, [-1, 4]) rois, roi_gt_class_ids, deltas = tf.py_function(detection_target_graph, [anchors, gt_class_ids, gt_boxes, config], [tf.float32, tf.int64, tf.float32]) rois.set_shape([config.RPN_TRAIN_ANCHORS_PER_IMAGE, 4]) roi_gt_class_ids.set_shape([config.RPN_TRAIN_ANCHORS_PER_IMAGE]) deltas.set_shape([config.RPN_TRAIN_ANCHORS_PER_IMAGE, 4 * config.NUM_CLASSES]) rpn_match.set_shape([None, 1]) rpn_bbox.set_shape([None, 4]) rois = tf.stop_gradient(rois) roi_gt_class_ids = tf.stop_gradient(roi_gt_class_ids) deltas = tf.stop_gradient(deltas) rpn_match = tf.stop_gradient(rpn_match) rpn_bbox = tf.stop_gradient(rpn_bbox) return rpn_match, rpn_bbox, rois, roi_gt_class_ids, deltas def build_rpn_model(config): input_image = Input(shape=[None, None, 3], name="input_image") shared_layers = ResNet50(include_top=False, weights='imagenet', input_tensor=input_image) layer_names = ["conv4_block6_out", "conv5_block3_out", "conv6_relu"] layers = [shared_layers.get_layer(name).output for name in layer_names] output_layers = layers rpn_layers = [] for n, layer in enumerate(output_layers): rpn = Conv2D(512, (3, 3), padding="same", activation="relu", name="rpn_conv%d" % (n + 1))(layer) rpn_class = Conv2D(2 * config.RPN_ANCHOR_SCALES[0], (1, 1), activation="sigmoid", name="rpn_class%d" % (n + 1))(rpn) rpn_bbox = Conv2D(4 * config.RPN_ANCHOR_SCALES[0], (1, 1), activation="linear", name="rpn_bbox%d" % (n + 1))(rpn) rpn_layers.append(rpn_class) rpn_layers.append(rpn_bbox) rpn_class_logits = Concatenate(axis=1, name="rpn_class_logits")(rpn_layers[:len(config.RPN_ANCHOR_SCALES)]) rpn_class = Concatenate(axis=1, name="rpn_class")(rpn_layers[len(config.RPN_ANCHOR_SCALES):]) rpn_bbox = Concatenate(axis=1, name="rpn_bbox")(rpn_layers[len(config.R
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值