第一阶段的rpn训练是在pre-trained模型的基础上fine-tuning
print '~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~'
print 'Stage 1 RPN, init from ImageNet model'
print '~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~'cfg.TRAIN.SNAPSHOT_INFIX = 'stage1'
mp_kwargs = dict(
queue=mp_queue,
imdb_name=args.imdb_name,
init_model=args.pretrained_model,
solver=solvers[0],
max_iters=max_iters[0],
cfg=cfg)
p = mp.Process(target=train_rpn, kwargs=mp_kwargs)
p.start()
rpn_stage1_out = mp_queue.get()
p.join()
def train_rpn(queue=None, imdb_name=None, init_model=None, solver=None,
max_iters=None, cfg=None):
"""Train a Region Proposal Network in a separate training process.
"""# Not using any proposals, just ground-truth boxes
cfg.TRAIN.HAS_RPN = True
cfg.TRAIN.BBOX_REG = False # applies only to Fast R-CNN bbox regression
cfg.TRAIN.PROPOSAL_METHOD = 'gt'
cfg.TRAIN.IMS_PER_BATCH = 1
print 'Init model: {}'.format(init_model)
print('Using config:')
pprint.pprint(cfg)import caffe
_init_caffe(cfg)roidb, imdb = get_roidb(imdb_name)
print 'roidb len: {}'.format(len(roidb))
output_dir = get_output_dir(imdb)
print 'Output will be saved to `{:s}`'.format(output_dir)#from fast_rcnn.train import get_training_roidb, train_net
model_paths = train_net(solver, roidb, output_dir,
pretrained_model=init_model,
max_iters=max_iters)
# Cleanup all but the final model
for i in model_paths[:-1]:
os.remove(i)
rpn_model_path = model_paths[-1]
# Send final model path through the multiprocessing queue
queue.put({'model_path': rpn_model_path})
def get_roidb(imdb_name, rpn_file=None):
imdb = get_imdb(imdb_name) #组装Pascal_voc类 获取
print 'Loaded dataset `{:s}` for training'.format(imdb.name)
imdb.set_proposal_method(cfg.TRAIN.PROPOSAL_METHOD)
print 'Set proposal method: {:s}'.format(cfg.TRAIN.PROPOSAL_METHOD)
if rpn_file is not None:
imdb.config['rpn_file'] = rpn_file
roidb = get_training_roidb(imdb)
return roidb, imdb
imdb 是pascal_voc(imdb)类对象,记录了_data_path,_class_to_ind,_image_index等信息。
imdb.set_proposal_method(cfg.TRAIN.PROPOSAL_METHOD)
----------------------------------------------------------------------------------------------
def set_proposal_method(self, method):
method = eval('self.' + method + '_roidb')
self.roidb_handler = method
设置了获取roidb的方法gt_roidb(),并未调用。
def get_training_roidb(imdb):
"""Returns a roidb (Region of Interest database) for use in training."""
if cfg.TRAIN.USE_FLIPPED:
print 'Appending horizontally-flipped training examples...'
imdb.append_flipped_images() #添加水平翻转的图像,增加样本数量
print 'done'print 'Preparing training data...'
rdl_roidb.prepare_roidb(imdb) #读取图片的roi信息
print 'done'return imdb.roidb
def prepare_roidb(imdb):
"""Enrich the imdb's roidb by adding some derived quantities that
are useful for training. This function precomputes the maximum
overlap, taken over ground-truth boxes, between each ROI and
each ground-truth box. The class with maximum overlap is also
recorded.
"""
sizes = [PIL.Image.open(imdb.image_path_at(i)).size
for i in xrange(imdb.num_images)]
roidb = imdb.roidb
for i in xrange(len(imdb.image_index)):
roidb[i]['image'] = imdb.image_path_at(i)
roidb[i]['width'] = sizes[i][0]
roidb[i]['height'] = sizes[i][1]
# need gt_overlaps as a dense array for argmax
gt_overlaps = roidb[i]['gt_overlaps'].toarray()
# max overlap with gt over classes (columns)
max_overlaps = gt_overlaps.max(axis=1)
# gt class that had the max overlap
max_classes = gt_overlaps.argmax(axis=1)
roidb[i]['max_classes'] = max_classes
roidb[i]['max_overlaps'] = max_overlaps
# sanity checks
# max overlap of 0 => class should be zero (background)
zero_inds = np.where(max_overlaps == 0)[0]
assert all(max_classes[zero_inds] == 0)
# max overlap > 0 => class should not be zero (must be a fg class)
nonzero_inds = np.where(max_overlaps > 0)[0]
assert all(max_classes[nonzero_inds] != 0)
def prepare_roidb(imdb):
"""Enrich the imdb's roidb by adding some derived quantities that
are useful for training. This function precomputes the maximum
overlap, taken over ground-truth boxes, between each ROI and
each ground-truth box. The class with maximum overlap is also
recorded.
"""
sizes = [PIL.Image.open(imdb.image_path_at(i)).size
for i in xrange(imdb.num_images)]# 通过内置的装饰器 调用pascal_voc.py中gt_roidb()
roidb = imdb.roidb
for i in xrange(len(imdb.image_index)):
roidb[i]['image'] = imdb.image_path_at(i) #记录原图信息
roidb[i]['width'] = sizes[i][0]
roidb[i]['height'] = sizes[i][1]
# need gt_overlaps as a dense array for argmax
gt_overlaps = roidb[i]['gt_overlaps'].toarray()
# max overlap with gt over classes (columns)
max_overlaps = gt_overlaps.max(axis=1)
# gt class that had the max overlap
max_classes = gt_overlaps.argmax(axis=1)
roidb[i]['max_classes'] = max_classes
roidb[i]['max_overlaps'] = max_overlaps
# sanity checks
# max overlap of 0 => class should be zero (background)
zero_inds = np.where(max_overlaps == 0)[0]
assert all(max_classes[zero_inds] == 0)
# max overlap > 0 => class should not be zero (must be a fg class)
nonzero_inds = np.where(max_overlaps > 0)[0]
assert all(max_classes[nonzero_inds] != 0)
def gt_roidb(self):
"""
Return the database of ground-truth regions of interest.This function loads/saves from/to a cache file to speed up future calls.
"""
cache_file = os.path.join(self.cache_path, self.name + '_gt_roidb.pkl')
if os.path.exists(cache_file):
with open(cache_file, 'rb') as fid:
roidb = cPickle.load(fid)
print '{} gt roidb loaded from {}'.format(self.name, cache_file)
return roidbgt_roidb = [self._load_pascal_annotation(index)
for index in self.image_index]
with open(cache_file, 'wb') as fid:
cPickle.dump(gt_roidb, fid, cPickle.HIGHEST_PROTOCOL)
print 'wrote gt roidb to {}'.format(cache_file)return gt_roidb
该函数是为了获取图片标注物体的信息。如果之前获取过roi信息,会直接读取data/cache/*.pkl;否则会通过_load_pascal_annotatio读取每一张图片对应的data/VOCDevkit/Pascal_voc/Annotation目录下的*.xml文件中的信息:{'boxes' : boxes, #存储gt_box的左上角和右下角坐标
'gt_classes': gt_classes, # 每个gt_box的编号和标注的类
'gt_overlaps' : overlaps, #每个gt_boxbox在不同类别的得分, gt_box_num * class_num的矩阵
'flipped' : False, #未被翻转
'seg_areas' : seg_areas #gt_box的面积}
(如果数据集有变动,需要rm -r data/cache/*,否则会读取旧的*.pkl)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
至此完成了对数据信息的读取,主要是原图信息及其对应的标注信息。
=====================================================================
下面是执行caffe训练的过程:
solver_prototxt:stage1_fast_rcnn_solver30k40k.pt;pretrained_model为ZF/VGG16等预训练的模型;roidb为上面得到的数据
# lib/fast_rcnn/train.py
def __init__(self, solver_prototxt, roidb, output_dir,
pretrained_model=None):
"""Initialize the SolverWrapper."""
self.output_dir = output_dirif (cfg.TRAIN.HAS_RPN and cfg.TRAIN.BBOX_REG and
cfg.TRAIN.BBOX_NORMALIZE_TARGETS):
# RPN can only use precomputed normalization because there are no
# fixed statistics to compute a priori
assert cfg.TRAIN.BBOX_NORMALIZE_TARGETS_PRECOMPUTEDif cfg.TRAIN.BBOX_REG:
print 'Computing bounding-box regression targets...'
self.bbox_means, self.bbox_stds = \
rdl_roidb.add_bbox_regression_targets(roidb)
print 'done'self.solver = caffe.SGDSolver(solver_prototxt)
if pretrained_model is not None:
print ('Loading pretrained model '
'weights from {:s}').format(pretrained_model)
self.solver.net.copy_from(pretrained_model)self.solver_param = caffe_pb2.SolverParameter() # 调用layer.py------setup()
with open(solver_prototxt, 'rt') as f:
pb2.text_format.Merge(f.read(), self.solver_param)self.solver.net.layers[0].set_roidb(roidb)
def train_model(self, max_iters):
"""Network training loop."""
last_snapshot_iter = -1
timer = Timer()
model_paths = []
while self.solver.iter < max_iters:
# Make one SGD update
timer.tic()
self.solver.step(1) #调用layer.py ----- forward
timer.toc()
if self.solver.iter % (10 * self.solver_param.display) == 0:
print 'speed: {:.3f}s / iter'.format(timer.average_time)if self.solver.iter % cfg.TRAIN.SNAPSHOT_ITERS == 0:
last_snapshot_iter = self.solver.iter
model_paths.append(self.snapshot())if last_snapshot_iter != self.solver.iter:
model_paths.append(self.snapshot())
return model_pathsdef train_net(solver_prototxt, roidb, output_dir,
pretrained_model=None, max_iters=40000):
"""Train a Fast R-CNN network."""roidb = filter_roidb(roidb)
sw = SolverWrapper(solver_prototxt, roidb, output_dir,
pretrained_model=pretrained_model)print 'Solving...'
model_paths = sw.train_model(max_iters)
print 'done solving'
return model_paths
# lib/roi_data_layer/layer.py
class RoIDataLayer(caffe.Layer):
"""Fast R-CNN data layer used for training."""def _shuffle_roidb_inds(self):
"""Randomly permute the training roidb."""
if cfg.TRAIN.ASPECT_GROUPING:
widths = np.array([r['width'] for r in self._roidb])
heights = np.array([r['height'] for r in self._roidb])
horz = (widths >= heights)
vert = np.logical_not(horz)
horz_inds = np.where(horz)[0]
vert_inds = np.where(vert)[0]
inds = np.hstack((
np.random.permutation(horz_inds),
np.random.permutation(vert_inds)))
inds = np.reshape(inds, (-1, 2))
row_perm = np.random.permutation(np.arange(inds.shape[0]))
inds = np.reshape(inds[row_perm, :], (-1,))
self._perm = inds
else:
self._perm = np.random.permutation(np.arange(len(self._roidb)))
self._cur = 0def _get_next_minibatch_inds(self):
"""Return the roidb indices for the next minibatch."""#一个epoch后打乱顺序
if self._cur + cfg.TRAIN.IMS_PER_BATCH >= len(self._roidb):
self._shuffle_roidb_inds()db_inds = self._perm[self._cur:self._cur + cfg.TRAIN.IMS_PER_BATCH]
self._cur += cfg.TRAIN.IMS_PER_BATCH
return db_indsdef _get_next_minibatch(self):
"""Return the blobs to be used for the next minibatch.If cfg.TRAIN.USE_PREFETCH is True, then blobs will be computed in a
separate process and made available through self._blob_queue.
"""
if cfg.TRAIN.USE_PREFETCH:
return self._blob_queue.get()
else:
db_inds = self._get_next_minibatch_inds()
minibatch_db = [self._roidb[i] for i in db_inds]
return get_minibatch(minibatch_db, self._num_classes)def set_roidb(self, roidb):
"""Set the roidb to be used by this layer during training."""
self._roidb = roidb
self._shuffle_roidb_inds()
if cfg.TRAIN.USE_PREFETCH:
self._blob_queue = Queue(10)
self._prefetch_process = BlobFetcher(self._blob_queue,
self._roidb,
self._num_classes)
self._prefetch_process.start()
# Terminate the child process when the parent exists
def cleanup():
print 'Terminating BlobFetcher'
self._prefetch_process.terminate()
self._prefetch_process.join()
import atexit
atexit.register(cleanup)def setup(self, bottom, top):
"""Setup the RoIDataLayer."""# parse the layer parameter string, which must be valid YAML
layer_params = yaml.load(self.param_str_)self._num_classes = layer_params['num_classes']
self._name_to_top_map = {}
# data blob: holds a batch of N images, each with 3 channels
idx = 0
top[idx].reshape(cfg.TRAIN.IMS_PER_BATCH, 3,
max(cfg.TRAIN.SCALES), cfg.TRAIN.MAX_SIZE)
self._name_to_top_map['data'] = idx
idx += 1if cfg.TRAIN.HAS_RPN:
top[idx].reshape(1, 3)
self._name_to_top_map['im_info'] = idx
idx += 1top[idx].reshape(1, 4)
self._name_to_top_map['gt_boxes'] = idx
idx += 1
else: # not using RPN
# rois blob: holds R regions of interest, each is a 5-tuple
# (n, x1, y1, x2, y2) specifying an image batch index n and a
# rectangle (x1, y1, x2, y2)
top[idx].reshape(1, 5)
self._name_to_top_map['rois'] = idx
idx += 1# labels blob: R categorical labels in [0, ..., K] for K foreground
# classes plus background
top[idx].reshape(1)
self._name_to_top_map['labels'] = idx
idx += 1if cfg.TRAIN.BBOX_REG:
# bbox_targets blob: R bounding-box regression targets with 4
# targets per class
top[idx].reshape(1, self._num_classes * 4)
self._name_to_top_map['bbox_targets'] = idx
idx += 1# bbox_inside_weights blob: At most 4 targets per roi are active;
# thisbinary vector sepcifies the subset of active targets
top[idx].reshape(1, self._num_classes * 4)
self._name_to_top_map['bbox_inside_weights'] = idx
idx += 1top[idx].reshape(1, self._num_classes * 4)
self._name_to_top_map['bbox_outside_weights'] = idx
idx += 1print 'RoiDataLayer: name_to_top:', self._name_to_top_map
assert len(top) == len(self._name_to_top_map)def forward(self, bottom, top):
"""Get blobs and copy them into this layer's top blob vector."""
blobs = self._get_next_minibatch()for blob_name, blob in blobs.iteritems():
top_ind = self._name_to_top_map[blob_name]
# Reshape net's input blobs
top[top_ind].reshape(*(blob.shape))
# Copy data into net's input blobs
top[top_ind].data[...] = blob.astype(np.float32, copy=False)def backward(self, top, propagate_down, bottom):
"""This layer does not propagate gradients."""
passdef reshape(self, bottom, top):
"""Reshaping happens during the call to forward."""
pass
stage1_fast_rcnn_train.pt
layer {
name: 'data' #层名称
type: 'Python' #层类型
top: 'data' #输出(下同)
top: 'labels'
top: 'bbox_targets'
top: 'bbox_inside_weights'
top: 'bbox_outside_weights'
python_param {
module: 'roi_data_layer.layer' #该层对应的代码地址 py-faster-rcnn-master\lib\roi_data_layer\layer.py
layer: 'RoIDataLayer'
param_str: "'num_classes': 21"
}
}