faster rcnn接口_Faster R-CNN tensorflow代码详解

研究背景

根据Faster-RCNN算法的运行和调试情况,对代码进行深入分析。

参考资料

各部分代码分析

1 编译Cython模块

cd tf-faster-rcnn/lib # 首先进入目录Faster-RCNN_TF/lib

make clean

make #编译

编译成功之后,目录tf-faster-rcnn/lib/nms 和tf-faster-rcnn/lib/roi_pooling_layer/ 和tf-faster-rcnn/lib/utils下面会出现一些.so文件。

注意:.so文件不具可移植到性,因为编译生成的文件是只适应本台计算机的,换一台计算机之后,用原来的.so文件程序会出错。并且,必须要先删除旧的.so文件make clean,否则就会调用旧的.so文件,而不生成新的.so文件。重新运行程序的时候,要先删除这几个.so文件,并重新进行编译。

2 pascal_voc数据集的数据读写接口

2.1 工程文件tf-faster-rcnn中读取数据的接口都在目录tf-faster-rcnn/lib/datasets下。共有2种数据来训练网络,分别是pascal_voc和coco,数据读写接口分别是tf-faster-rcnn/lib/datasets中的pascal_voc.py和coco.py。

工程主要用到的是目录Annotations中的XML文件、目录JPEGImages中的图片、目录ImageSets/Layout中的txt文件。

目录下其他文件:

factory.py:是个工厂类,用类生成imdb类并且返回数据库供网络训练和测试使用;

imdb.py:是数据库读写类的基类,分装了许多db的操作,具体的一些文件读写需要继承继续读写。

VOCdevkit/

VOCdevkit/VOC2007/

VOCdevkit/VOC2007/Annotations #所有图片的XML文件,一张图片对应一个XML文件,XML文件中给出的图片gt的形式是左上角和右下角的坐标

VOCdevkit/VOC2007/ImageSets/

VOCdevkit/VOC2007/ImageSets/Layout #里面有三个txt文件,分别是train.txt,trainval.txt,val.txt,存储的分别是训练图片的名字列表,训练验证集的图片名字列表,验证集图片的名字列表(名字均没有.jpg后缀)

VOCdevkit/VOC2007/ImageSets/Main

VOCdevkit/VOC2007/ImageSets/Segmentation

VOCdevkit/VOC2007/JPEGImages #所有的图片*.jpg

VOCdevkit/VOC2007/SegmentationClass #segmentations by class

VOCdevkit/VOC2007/SegmentationObject #segmentations by object

2.2 pascal_voc的数据读写接口

主函数 if name == ‘main’在文件pascal_voc.py的最下面

if __name__ == '__main__':

from datasets.pascal_voc import pascal_voc

d = pascal_voc('trainval', '2007') #pascal_voc是一个类

res = d.roidb

from IPython import embed;

embed()

主函数中的类 pascal_voc代码,在文件pascal_voc.py的最上面:

class pascal_voc(imdb):

def __init__(self, image_set, year, use_diff=False):

name = 'voc_' + year + '_' + image_set

if use_diff:

name += '_diff'

imdb.__init__(self, name)

self._year = year

self._image_set = image_set

self._devkit_path = self._get_default_path()

self._data_path = os.path.join(self._devkit_path, 'VOC' + self._year)

self._classes = ('__background__', # always index 0

'aeroplane', 'bicycle', 'bird', 'boat',

'bottle', 'bus', 'car', 'cat', 'chair',

'cow', 'diningtable', 'dog', 'horse',

'motorbike', 'person', 'pottedplant',

'sheep', 'sofa', 'train', 'tvmonitor')

self._class_to_ind = dict(list(zip(self.classes, list(range(self.num_classes)))))

self._image_ext = '.jpg'

self._image_index = self._load_image_set_index()

# Default to roidb handler

self._roidb_handler = self.gt_roidb

self._salt = str(uuid.uuid4())

self._comp_id = 'comp4'

# PASCAL specific config options

self.config = {'cleanup': True,

'use_salt': True,

'use_diff': use_diff,

'matlab_eval': False,

'rpn_file': None}

assert os.path.exists(self._devkit_path), \

'VOCdevkit path does not exist: {}'.format(self._devkit_path)

assert os.path.exists(self._data_path), \

'Path does not exist: {}'.format(self._data_path)

def image_path_at(self, i):

"""

Return the absolute path to image i in the image sequence.

"""

return self.image_path_from_index(self._image_index[i])

def image_path_from_index(self, index):

"""

Construct an image path from the image's "index" identifier.

"""

image_path = os.path.join(self._data_path, 'JPEGImages',

index + self._image_ext)

assert os.path.exists(image_path), \

'Path does not exist: {}'.format(image_path)

return image_path

def _load_image_set_index(self):

"""

Load the indexes listed in this dataset's image set file.

"""

# Example path to image set file:

# self._devkit_path + /VOCdevkit2007/VOC2007/ImageSets/Main/val.txt

image_set_file = os.path.join(self._data_path, 'ImageSets', 'Main',

self._image_set + '.txt')

assert os.path.exists(image_set_file), \

'Path does not exist: {}'.format(image_set_file)

with open(image_set_file) as f:

image_index = [x.strip() for x in f.readlines()]

return image_index

def _get_default_path(self):

"""

Return the default path where PASCAL VOC is expected to be installed.

"""

return os.path.join(cfg.DATA_DIR, 'VOCdevkit' + self._year)

def gt_roidb(self):

"""

Return the database of ground-truth regions of interest.

This function loads/saves from/to a cache file to speed up future calls.

"""

cache_file = os.path.join(self.cache_path, self.name + '_gt_roidb.pkl')

if os.path.exists(cache_file):

with open(cache_file, 'rb') as fid:

try:

roidb = pickle.load(fid)

except:

roidb = pickle.load(fid, encoding='bytes')

print('{} gt roidb loaded from {}'.format(self.name, cache_file))

return roidb

gt_roidb = [self._load_pascal_annotation(index)

for index in self.image_index]

with open(cache_file, 'wb') as fid:

pickle.dump(gt_roidb, fid, pickle.HIGHEST_PROTOCOL)

print('wrote gt roidb to {}'.format(cache_file))

return gt_roidb

def rpn_roidb(self):

if int(self._year) == 2007 or self._image_set != 'test':

gt_roidb = self.gt_roidb()

rpn_roidb = self._load_rpn_roidb(gt_roidb)

roidb = imdb.merge_roidbs(gt_roidb, rpn_roidb)

else:

roidb = self._load_rpn_roidb(None)

return roidb

def _load_rpn_roidb(self, gt_roidb):

filename = self.config['rpn_file']

print('loading {}'.format(filename))

assert os.path.exists(filename), \

'rpn data not found at: {}'.format(filename)

with open(filename, 'rb') as f:

box_list = pickle.load(f)

return self.create_roidb_from_box_list(box_list, gt_roidb)

def _load_pascal_annotation(self, index):

"""

Load image and bounding boxes info from XML file in the PASCAL VOC

format.

"""

filename = os.path.join(self._data_path, 'Annotations', index + '.xml')

tree = ET.parse(filename)

objs = tree.findall('object')

if not self.config['use_diff']:

# Exclude the samples labeled as difficult

non_diff_objs = [

obj for obj in objs if int(obj.find('difficult').text) == 0]

# if len(non_diff_objs) != len(objs):

# print 'Removed {} difficult objects'.format(

# len(objs) - len(non_diff_objs))

objs = non_diff_objs

num_objs = len(objs)

boxes = np.zeros((num_objs, 4), dtype=np.uint16)

gt_classes = np.zeros((num_objs), dtype=np.int32)

overlaps = np.zeros((num_objs, self.num_classes), dtype=np.float32)

# "Seg" area for pascal is just the box area

seg_areas = np.zeros((num_objs), dtype=np.float32)

# Load object bounding boxes into a data frame.

for ix, obj in enumerate(objs):

bbox = obj.find('bndbox')

# Make pixel indexes 0-based

x1 = float(bbox.find('xmin').text) - 1

y1 = float(bbox.find('ymin').text) - 1

x2 = float(bbox.find('xmax').text) - 1

y2 = float(bbox.find('ymax').text) - 1

cls = self._class_to_ind[obj.find('name').text.lower().strip()]

boxes[ix, :] = [x1, y1, x2, y2]

gt_classes[ix] = cls

overlaps[ix, cls] = 1.0

seg_areas[ix] = (x2 - x1 + 1) * (y2 - y1 + 1)

overlaps = scipy.sparse.csr_matrix(overlaps)

return {'boxes': boxes,

'gt_classes': gt_classes,

'gt_overlaps': overlaps,

'flipped': False,

'seg_areas': seg_areas}

def _get_comp_id(self):

comp_id = (self._comp_id + '_' + self._salt if self.config['use_salt']

else self._comp_id)

return comp_id

def _get_voc_results_file_template(self):

# VOCdevkit/results/VOC2007/Main/_det_test_aeroplane.txt

filename = self._get_comp_id() + '_det_' + self._image_set + '_{:s}.txt'

path = os.path.join(

self._devkit_path,

'results',

'VOC' + self._year,

'Main',

filename)

return path

def _write_voc_results_file(self, all_boxes):

for cls_ind, cls in enumerate(self.classes):

if cls == '__background__':

continue

print('Writing {} VOC results file'.format(cls))

filename = self._get_voc_results_file_template().format(cls)

with open(filename, 'wt') as f:

for im_ind, index in enumerate(self.image_index):

dets = all_boxes[cls_ind][im_ind]

if dets == []:

continue

# the VOCdevkit expects 1-based indices

for k in range(dets.shape[0]):

f.write('{:s} {:.3f} {:.1f} {:.1f} {:.1f} {:.1f}\n'.

format(index, dets[k, -1],

dets[k, 0] + 1, dets[k, 1] + 1,

dets[k, 2] + 1, dets[k, 3] + 1))

def _do_python_eval(self, output_dir='output'):

annopath = os.path.join(

self._devkit_path,

'VOC' + self._year,

'Annotations',

'{:s}.xml')

imagesetfile = os.path.join(

self._devkit_path,

'VOC' + self._year,

'ImageSets',

'Main',

self._image_set + '.txt')

cachedir = os.path.join(self._devkit_path, 'annotations_cache')

aps = []

# The PASCAL VOC metric changed in 2010

use_07_metric = True if int(self._year) < 2010 else False

print('VOC07 metric? ' + ('Yes' if use_07_metric else 'No'))

if not os.path.isdir(output_dir):

os.mkdir(output_dir)

for i, cls in enumerate(self._classes):

if cls == '__background__':

continue

filename = self._get_voc_results_file_template().format(cls)

rec, prec, ap = voc_eval(

filename, annopath, imagesetfile, cls, ca

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值