faster rcnn代码解析（4）

最新推荐文章于 2020-05-03 22:35:32 发布
l_ml_m_lm_m
最新推荐文章于 2020-05-03 22:35:32 发布
阅读量576
点赞数
分类专栏： faster_rcnn代码详解
本文链接：https://blog.csdn.net/l_ml_m_lm_m/article/details/81276001
版权
faster_rcnn代码详解专栏收录该内容
16 篇文章 1 订阅
订阅专栏
lib/datasets/imdb.py
# --------------------------------------------------------
# Fast R-CNN
# Copyright (c) 2015 Microsoft
# Licensed under The MIT License [see LICENSE for details]
# Written by Ross Girshick
# --------------------------------------------------------

import os
import os.path as osp
import PIL
from utils.cython_bbox import bbox_overlaps#?
import numpy as np
import scipy.sparse
from fast_rcnn.config import cfg

class imdb(object):
    """Image database."""

    def __init__(self, name):
        self._name = name   #在类的方法或属性前加一个“_”单下划线，意味着该方法或属性不应该去调用，它并不属于API
        self._num_classes = 0  #类别长度
        self._classes = []   #类别定义
        self._image_index = [] #a list of image name
        self._obj_proposer = 'selective_search'
        self._roidb = None   #gt_roidb（cfg.TRAIN.PROPOSAL_METHOD=gt导致了此操作）
        self._roidb_handler = self.default_roidb
        # Use this dict for storing dataset specific config options
        self.config = {}

    @property#Python内置的@property装饰器就是负责把一个方法变成属性调用的(@property,@roidb_handler.setter)
    def name(self):
        return self._name

    @property
    def num_classes(self):
        return len(self._classes)

    @property
    def classes(self):
        return self._classes

    @property
    def image_index(self):
        return self._image_index

    @property
    def roidb_handler(self):
        return self._roidb_handler

    @roidb_handler.setter
    #负责把一个setter方法变成属性赋值,一个可控的属性操作
    def roidb_handler(self, val):
        self._roidb_handler = val

    def set_proposal_method(self, method):
        method = eval('self.' + method + '_roidb')
        #eval() 函数用来执行一个字符串表达式，并返回表达式的值。首先用eval()对这个方法进行解析，
        # 使其有效，再传入roidb_handler中
        self.roidb_handler = method

    @property
    def roidb(self):
        # A roidb is a list of dictionaries, each with the following keys:
        # roidb是一个字典组成的list
        # roidb中的四个键
        #   boxes
        #   gt_overlaps
        #   gt_classes
        #   flipped
        if self._roidb is not None:
            return self._roidb
        self._roidb = self.roidb_handler()
        return self._roidb

    @property
    def cache_path(self):#储存位置
        cache_path = osp.abspath(osp.join(cfg.DATA_DIR, 'cache'))#.os.path.abspath(path) 返回path规范化的绝对路径
        if not os.path.exists(cache_path):
            os.makedirs(cache_path)
        return cache_path

    @property
    def num_images(self):#图像索引的长度=图片数量
      return len(self.image_index)

    def image_path_at(self, i):
        raise NotImplementedError

    def default_roidb(self):
        raise NotImplementedError

    def evaluate_detections(self, all_boxes, output_dir=None):
        """
        all_boxes is a list of length number-of-classes.
        Each list element is a list of length number-of-images.
        Each of those list elements is either an empty list []
        or a numpy array of detection.

        all_boxes[class][image] = [] or np.array of shape #dets x 5
        """
        raise NotImplementedError

    def _get_widths(self):
      return [PIL.Image.open(self.image_path_at(i)).size[0]#返回图片的宽度
              #PIL.Image.open().size（）它的返回值为宽度和高度的二元组（width, height）
              for i in xrange(self.num_images)]

    def append_flipped_images(self):#加入水平翻转的图片，总数也翻倍
        num_images = self.num_images
        widths = self._get_widths()
        for i in xrange(num_images):
            boxes = self.roidb[i]['boxes'].copy()#copy boxes的vlaues
            oldx1 = boxes[:, 0].copy() #boxes格式参见Pascal_voc,我oldx1是Xmin,oldx2是Xmax
            oldx2 = boxes[:, 2].copy() 
            boxes[:, 0] = widths[i] - oldx2 - 1 #以图片的中轴线做水平镜像xmax
            boxes[:, 2] = widths[i] - oldx1 - 1 #xmin
            # for b in range(len(boxes)):
                # if boxes[b][2]< boxes[b][0]:
                    # boxes[b][0] = 0
            assert (boxes[:, 2] >= boxes[:, 0]).all()#断言所有的Xmax大于Xmin
            entry = {'boxes' : boxes,
                     'gt_overlaps' : self.roidb[i]['gt_overlaps'],#roidb中gt_overlaps对应的vlaues
                     # len（box）*类别数（即，每个box对应的类别。初始化时，从xml读出来的类别对应类别值是1.0，被压缩保存）
                     'gt_classes' : self.roidb[i]['gt_classes'],#roidb中gt_classes对应的vlaues
                     #每个box的类别索引
                     'flipped' : True}#,代表图片被水平反转，改变了boxes里第一、三列的值
            self.roidb.append(entry)
        self._image_index = self._image_index * 2 #索引数x2，因为翻转

    def evaluate_recall(self, candidate_boxes=None, thresholds=None,
                        area='all', limit=None):  #理解的不是很好？
        """Evaluate detection proposal recall metrics.评估检测建议召回指标

        Returns:#返回结果
            results: dictionary of results with keys
                'ar': average recall平均召回率
                'recalls': vector recalls at each IoU overlap threshold
                'thresholds': vector of IoU overlap thresholds
                'gt_overlaps': vector of all ground-truth overlaps
        """
        # Record max overlap value for each gt box
        # Return vector of overlap values  对于每个gt,记录max overlap并返回overlap向量值
        areas = { 'all': 0, 'small': 1, 'medium': 2, 'large': 3,
                  '96-128': 4, '128-256': 5, '256-512': 6, '512-inf': 7}#定义一个字典，作用？
        area_ranges = [ [0**2, 1e5**2],    # all
                        [0**2, 32**2],     # small
                        [32**2, 96**2],    # medium
                        [96**2, 1e5**2],   # large
                        [96**2, 128**2],   # 96-128
                        [128**2, 256**2],  # 128-256
                        [256**2, 512**2],  # 256-512
                        [512**2, 1e5**2],  # 512-inf
                      ]
        assert areas.has_key(area), 'unknown area range: {}'.format(area)#检查areas中是否有area键
        area_range = area_ranges[areas[area]]
        gt_overlaps = np.zeros(0)
        num_pos = 0
        for i in xrange(self.num_images):
            # Checking for max_overlaps == 1 avoids including crowd annotations
            # (...pretty hacking :/)
            max_gt_overlaps = self.roidb[i]['gt_overlaps'].toarray().max(axis=1)
            gt_inds = np.where((self.roidb[i]['gt_classes'] > 0) &
                               (max_gt_overlaps == 1))[0]
            gt_boxes = self.roidb[i]['boxes'][gt_inds, :]#得到满足条件的boxes的坐标
            gt_areas = self.roidb[i]['seg_areas'][gt_inds]#得到满足条件的boxes的面积
            valid_gt_inds = np.where((gt_areas >= area_range[0]) &
                                     (gt_areas <= area_range[1]))[0]
            gt_boxes = gt_boxes[valid_gt_inds, :]
            num_pos += len(valid_gt_inds) #统计正样本的个数

            if candidate_boxes is None:
                # If candidate_boxes is not supplied, the default is to use the
                # non-ground-truth boxes from this roidb
                # 如果没有提供candidate_boxes （候选箱？），则默认使用roidb中的non-ground-truth boxes
                non_gt_inds = np.where(self.roidb[i]['gt_classes'] == 0)[0]
                boxes = self.roidb[i]['boxes'][non_gt_inds, :]
            else:
                boxes = candidate_boxes[i]
            if boxes.shape[0] == 0:
                continue
            if limit is not None and boxes.shape[0] > limit:
                boxes = boxes[:limit, :]

            overlaps = bbox_overlaps(boxes.astype(np.float),
                                     gt_boxes.astype(np.float))

            _gt_overlaps = np.zeros((gt_boxes.shape[0]))
            for j in xrange(gt_boxes.shape[0]):
                # find which proposal box maximally covers each gt box
                argmax_overlaps = overlaps.argmax(axis=0)
                # and get the iou amount of coverage for each gt box
                max_overlaps = overlaps.max(axis=0)
                # find which gt box is 'best' covered (i.e. 'best' = most iou)
                gt_ind = max_overlaps.argmax()
                gt_ovr = max_overlaps.max()
                assert(gt_ovr >= 0)
                # find the proposal box that covers the best covered gt box
                box_ind = argmax_overlaps[gt_ind]
                # record the iou coverage of this gt box
                _gt_overlaps[j] = overlaps[box_ind, gt_ind]
                assert(_gt_overlaps[j] == gt_ovr)
                # mark the proposal box and the gt box as used
                overlaps[box_ind, :] = -1
                overlaps[:, gt_ind] = -1
            # append recorded iou coverage level
            gt_overlaps = np.hstack((gt_overlaps, _gt_overlaps))

        gt_overlaps = np.sort(gt_overlaps)
        if thresholds is None:
            step = 0.05
            thresholds = np.arange(0.5, 0.95 + 1e-5, step)
        recalls = np.zeros_like(thresholds)
        # compute recall for each iou threshold
        for i, t in enumerate(thresholds):
            recalls[i] = (gt_overlaps >= t).sum() / float(num_pos)
        # ar = 2 * np.trapz(recalls, thresholds)
        ar = recalls.mean()
        return {'ar': ar, 'recalls': recalls, 'thresholds': thresholds,
                'gt_overlaps': gt_overlaps}

#由box的相关信息创建相应的roidb
    def create_roidb_from_box_list(self, box_list, gt_roidb):
        #这里的box_list是rpn_file,gt_roidb是XML中的信息；该函数的作用其实就是RPN
        assert len(box_list) == self.num_images, \
                'Number of boxes must match number of ground-truth images' # box_list 的大小要与gt_roidb 相同， 并且各图像一一对应
        #因为box_list的格式是boxes[i][有限个boxes的坐标值]，i对应于图片
        roidb = []
        for i in xrange(self.num_images):# 遍历每张图像， 当前图像中box的个数
            boxes = box_list[i]
            num_boxes = boxes.shape[0]#num_boxes是指一副图片中有多少boxes,也就是当前这张图像中的box个数
            overlaps = np.zeros((num_boxes, self.num_classes), dtype=np.float32)#这里的overlaps维数对应于前面的num_objs*num_classes

            if gt_roidb is not None and gt_roidb[i]['boxes'].size > 0:#如果roidb存在且每张图片上的gt产生的boxes个数不为0，则执行下面程序
                gt_boxes = gt_roidb[i]['boxes']
                gt_classes = gt_roidb[i]['gt_classes']
                gt_overlaps = bbox_overlaps(boxes.astype(np.float),
                                            gt_boxes.astype(np.float)) #计算RPN产生的boxes和gt的gt_ovrlaps应该是二维数组nxm,n是RPN
                argmaxes = gt_overlaps.argmax(axis=1)#找出IOU最大值对应的那个索引
                maxes = gt_overlaps.max(axis=1)#找出最大的IOU值，maxes是一个列向量
                I = np.where(maxes > 0)[0] #I是大于0的IOU对应的行号
                overlaps[I, gt_classes[argmaxes[I]]] = maxes[I]#获取的overlaps数组存储的是每个proposal和gt的最大IOU，且满足IOU>0

            overlaps = scipy.sparse.csr_matrix(overlaps)#变成稀疏矩阵
            roidb.append({
                'boxes' : boxes,
                #gt_calsses 为一个全为0的以为数组
                'gt_classes' : np.zeros((num_boxes,), dtype=np.int32),
                'gt_overlaps' : overlaps,
                'flipped' : False,
                'seg_areas' : np.zeros((num_boxes,), dtype=np.float32),
            })
        return roidb

    @staticmethod
    def merge_roidbs(a, b):#该函数对由gt得到的roidb和RPN生成的boxes转换得到的roidb进行组合merge
        assert len(a) == len(b)
        for i in xrange(len(a)):
            a[i]['boxes'] = np.vstack((a[i]['boxes'], b[i]['boxes']))#boxes按行连接(沿着竖直方向将矩阵堆叠)
            a[i]['gt_classes'] = np.hstack((a[i]['gt_classes'],
                                            b[i]['gt_classes']))#gt_classes按列连接(沿着水平方向将矩阵堆叠)
            a[i]['gt_overlaps'] = scipy.sparse.vstack([a[i]['gt_overlaps'],
                                                       b[i]['gt_overlaps']])#gt_overlaps按行连接(沿着竖直方向将矩阵堆叠)
            a[i]['seg_areas'] = np.hstack((a[i]['seg_areas'],
                                           b[i]['seg_areas']))#seg_areas按列连接(沿着水平方向将矩阵堆叠)
        return a

    def competition_mode(self, on):
        """Turn competition mode on or off."""
        pass
l_ml_m_lm_m
关注
0
点赞
踩
4

收藏

觉得还不错? 一键收藏
0
评论
faster rcnn代码解析（4）

lib/datasets/imdb.py# --------------------------------------------------------# Fast R-CNN# Copyright (c) 2015 Microsoft# Licensed under The MIT License [see LICENSE for details]# Written by R...
复制链接

扫一扫