老师让测试一下几个检测模型,Faster-RCNN caffe版本编译一直有问题,Github上找了一个Windows下tensorflow的版本,全部默认参数没有调整,时间比较紧,简单跑了一下,显卡GTX1080 8G测试结果如下。
Faster-RCNN,使用VGG16预训练模型,基于windows10,tensorflow,cuda9.0,cudnn7.1,Python3.5,VOC2007数据集约10000张图片,类别20,训练集测试集1:1,迭代40000次,约7小时。
AP for aeroplane = 0.6837
AP for bicycle = 0.7048
AP for bird = 0.5932
AP for boat = 0.5433
AP for bottle = 0.3847
AP for bus = 0.6699
AP for car = 0.7083
AP for cat = 0.7078
AP for chair = 0.3824
AP for cow = 0.7424
AP for diningtable = 0.6268
AP for dog = 0.7494
AP for horse = 0.7963
AP for motorbike = 0.6616
AP for person = 0.6972
AP for pottedplant = 0.3255
AP for sheep = 0.6343
AP for sofa = 0.6185
AP for train = 0.6921
AP for tvmonitor = 0.5006
Mean AP = 0.6211
YOLOv3,batch=48,subdivisions=16,初始学习率0.001,使用darknet53.conv.74预训练模型,基于ubuntu16.04,darknet,cuda8.0,cudnn5.1,VOC2007数据集约10000张图片,类别20,训练集测试集1:1,迭代10000轮,约15小时。
AP for aeroplane = 0.656469218822
AP for bicycle = 0.738171052772
AP for bird = 0.599342814029
AP for boat = 0.466246285632
AP for bottle = 0.377019995151
AP for bus = 0.776727860512
AP for car = 0.81492015551
AP for cat = 0.769212772823
AP for chair = 0.443830951069
AP for cow = 0.713351960799
AP for diningtable = 0.519093086527
AP for dog = 0.719592841968
AP for horse = 0.788485138533
AP for motorbike = 0.790403446572
AP for person = 0.737553938525
AP for pottedplant = 0.311726030682
AP for sheep = 0.613778402001
AP for sofa = 0.661944290905
AP for train = 0.724236888543
AP for tvmonitor = 0.624288643631
Mean AP = 0.64231978875
其中Faster-RCNN计算mAP,参考test_net
# --------------------------------------------------------
# Tensorflow Faster R-CNN
# Licensed under The MIT License [see LICENSE for details]
# Written by Zheqi he, Xinlei Chen, based on code from Ross Girshick
# --------------------------------------------------------
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
# import _init_paths
from lib.utils.test import test_net
from lib.config import config as cfg
# from lib.config import cfg, cfg_from_file, cfg_from_list
from lib.datasets.factory import get_imdb
import argparse
import pprint
import time, os, sys
import tensorflow as tf
from lib.nets.vgg16 import vgg16
# from nets.resnet_v1 import resnetv1
# from nets.mobilenet_v1 import mobilenetv1
demonet = 'vgg16'
dataset = 'pascal_voc'
NETS = {'vgg16': ('vgg16.ckpt',)}
DATASETS = {'pascal_voc': ('voc_2007_trainval',)}
tfmodel = os.path.join('output', demonet, DATASETS[dataset][0], 'default', NETS[demonet][0])
def parse_args():
"""
Parse input arguments
"""
parser = argparse.ArgumentParser(description='Test a Fast R-CNN network')
parser.add_argument('--cfg', dest='cfg_file',
help='optional config file', default=None, type=str)
parser.add_argument('--model', dest='model',
help='model to test',
default=None, type=str)
parser.add_argument('--imdb', dest='imdb_name',
help='dataset to test',
default='voc_2007_test', type=str)
parser.add_argument('--comp', dest='comp_mode', help='competition mode',
action='store_true')
parser.add_argument('--num_dets', dest='max_per_image',
help='max number of detections per image',
default=100, type=int)
parser.add_argument('--tag', dest='tag',
help='tag of the model',
default='', type=str)
# parser.add_argument('--net', dest='net',
# help='vgg16, res50, res101, res152, mobile',
# default='res50', type=str)
parser.add_argument('--set', dest='set_cfgs',
help='set config keys', default=None,
nargs=argparse.REMAINDER)
parser.add_argument('--net', dest='demo_net', help='Network to use [vgg16 res101]',
choices=NETS.keys(), default='vgg16')
parser.add_argument('--dataset', dest='dataset', help='Trained dataset [pascal_voc pascal_voc_0712]',
choices=DATASETS.keys(), default='pascal_voc_0712')
# if len(sys.argv) == 1:
# parser.print_help()
# sys.exit(1)
args = parser.parse_args() # 解析参数
return args
if __name__ == '__main__':
args = parse_args()
print('Called with args:')
print(args)
# if args.cfg_file is not None:
# cfg_from_file(args.cfg_file)
# if args.set_cfgs is not None:
# cfg_from_list(args.set_cfgs)
print('Using config:')
pprint.pprint(cfg)
# if has model, get the name from it
# if does not, then just use the initialization weights
if tfmodel:
filename = os.path.splitext(os.path.basename(tfmodel))[0]
else:
filename = os.path.splitext(os.path.basename(args.weight))[0]
tag = args.tag
tag = tag if tag else 'default'
filename = tag + '/' + filename
imdb = get_imdb(args.imdb_name)
imdb.competition_mode(args.comp_mode)
tfconfig = tf.ConfigProto(allow_soft_placement=True)
tfconfig.gpu_options.allow_growth=True
# init session
sess = tf.Session(config=tfconfig)
# load network
if demonet == 'vgg16':
net = vgg16()
# elif args.net == 'res50':
# net = resnetv1(num_layers=50)
# elif args.net == 'res101':
# net = resnetv1(num_layers=101)
# elif args.net == 'res152':
# net = resnetv1(num_layers=152)
# elif args.net == 'mobile':
# net = mobilenetv1()
else:
raise NotImplementedError
# load model
net.create_architecture(sess, mode="TEST",num_classes=6, tag='default',
anchor_scales=[8, 16, 32],
anchor_ratios=[0.5, 1, 2])
if tfmodel:
print(('Loading model check point from {:s}').format(tfmodel))
saver = tf.train.Saver()
saver.restore(sess, tfmodel)
print('Loaded.')
else:
print(('Loading initial weights from {:s}').format(args.weight))
sess.run(tf.global_variables_initializer())
print('Loaded.')
test_net(sess, net, imdb, filename, max_per_image=args.max_per_image)
sess.close()
其中记得修改num_classes=6为自己的类别数量,例如我的VOC数据集为20种+背景,num_classes=21。