学习fast rnn 里面代码的目的是为了熟悉caffe的python接口,以及Python Layer的书写方式。
下面就以fast rcnn里面的RoIDataLayer层入手逐步学习:
class RoIDataLayer(caffe.Layer):
"""Fast R-CNN data layer used for training."""
def _shuffle_roidb_inds(self):
"""Randomly permute the training roidb."""
self._perm = np.random.permutation(np.arange(len(self._roidb)))
self._cur = 0
def _get_next_minibatch_inds(self):
"""Return the roidb indices for the next minibatch."""
if self._cur + cfg.TRAIN.IMS_PER_BATCH >= len(self._roidb):
self._shuffle_roidb_inds()
db_inds = self._perm[self._cur:self._cur + cfg.TRAIN.IMS_PER_BATCH]
self._cur += cfg.TRAIN.IMS_PER_BATCH
return db_inds
def _get_next_minibatch(self):
"""Return the blobs to be used for the next minibatch.
If cfg.TRAIN.USE_PREFETCH is True, then blobs will be computed in a
separate process and made available through self._blob_queue.
"""
if cfg.TRAIN.USE_PREFETCH:
return self._blob_queue.get()
else:
db_inds = self._get_next_minibatch_inds()
minibatch_db = [self._roidb[i] for i in db_inds]
return get_minibatch(minibatch_db, self._num_classes)
def set_roidb(self, roidb):
"""Set the roidb to be used by this layer during training."""
self._roidb = roidb
self._shuffle_roidb_inds()
if cfg.TRAIN.USE_PREFETCH:
self._blob_queue = Queue(10)
self._prefetch_process = BlobFetcher(self._blob_queue,
self._roidb,
self._num_classes)
self._prefetch_process.start()
# Terminate the child process when the parent exists
def cleanup():
print 'Terminating BlobFetcher'
self._prefetch_process.terminate()
self._prefetch_process.join()
import atexit
atexit.register(cleanup)
#这个函数相当于caffe c++接口的SetUp
def setup(self, bottom, top):
"""Setup the RoIDataLayer."""
# parse the layer parameter string, which must be valid YAML
#下面的这句的作用是解析输入的参数(输入的参数是python字典的形式的字符串),通过下面的函数就生成layer_params这个字典变量
layer_params = yaml.load(self.param_str_)
self._num_classes = layer_params['num_classes']
self._name_to_top_map = {
'data': 0,
'rois': 1,
'labels': 2}
# data blob: holds a batch of N images, each with 3 channels
# The height and width (100 x 100) are dummy values
top[0].reshape(1, 3, 100, 100)
# rois blob: holds R regions of interest, each is a 5-tuple
# (n, x1, y1, x2, y2) specifying an image batch index n and a
# rectangle (x1, y1, x2, y2) region of interest 这个表示fast rcnn使用的预选框(select search)
top[1].reshape(1, 5)
# labels blob: R categorical labels in [0, ..., K] for K foreground
# classes plus background
top[2].reshape(1)
#训练的时候需要输出ground truth和bounding box的loss权重
if cfg.TRAIN.BBOX_REG:
self._name_to_top_map['bbox_targets'] = 3
self._name_to_top_map['bbox_loss_weights'] = 4
# bbox_targets blob: R bounding-box regression targets with 4
# targets per class
top[3].reshape(1, self._num_classes * 4)
# bbox_loss_weights blob: At most 4 targets per roi are active;
# thisbinary vector sepcifies the subset of active targets
top[4].reshape(1, self._num_classes * 4)
def forward(self, bottom, top):
"""Get blobs and copy them into this layer's top blob vector."""
blobs = self._get_next_minibatch()
#字典的遍历方式
for blob_name, blob in blobs.iteritems():
top_ind = self._name_to_top_map[blob_name]
# Reshape net's input blobs
top[top_ind].reshape(*(blob.shape))
# Copy data into net's input blobs
top[top_ind].data[...] = blob.astype(np.float32, copy=False)
def backward(self, top, propagate_down, bottom):
"""This layer does not propagate gradients."""
pass
def reshape(self, bottom, top):
"""Reshaping happens during the call to forward."""
pass
小总结:
现在使用caffe做目标检测,通常存放bounding box都是使用batch的idx后面在加bounding box
,在加label