1. 下载源码地址:https://github.com/dBeker/Faster-RCNN-TensorFlow-Python3,下载预训练模型VGG16网络和其他你感兴趣的网络,下载地址: https://github.com/tensorflow/models/tree/master/research/slim#pre-trained-models.
在data文件夹下新建imagenet_weights文件夹,将下载好的网络放到此文件夹下。注意要将vgg_16.ckpt重命名为vgg16.ckpt
2.制作voc数据集,放在Faster-RCNN-TensorFlow-Python3-master\data\VOCdevkit2007文件夹下
3.修改参数并训练
3.1 训练之前修改lib/config/config.py下的代码,在第30行的max_iters中将40000修改成10000,能节省点时间。(‘snapshot_iterations’, 1000, “Iteration to take snapshot”),将5000改成1000,batch_size根据GPU修改
3.2在...\lib\datasets目录下,有pascal_voc.py文件,这个是必须要更改的,代码中 self._classes要来指定识别的类别,在33行修改:将代码中的类别替换为自己数据集的类别(不要更改'__background__'!!例如我需要分类的类别为card1,则选择一个原有类别更换为“card1”)
3.3 修改完参数后运行Faster-RCNN-TensorFlow-Python3.5-master\train.py即可训练。
训练结果被保存到了Faster-RCNN-TensorFlow-Python3.5-master\default\voc_2007_trainval\default
注:如果要再次进行训练,需要把Faster-RCNN-TensorFlow-Python3.5-master\default\voc_2007_trainval\default和Faster-RCNN-TensorFlow-Python3.5-master\output\vgg16\voc_2007_trainval\default路径下之前训练产生的模型和data/cache路径下的cache删掉
3.4 训练过程中出错:报错No module named ‘lib.utils.cython_bbox’
解决流程
3.4.1 修改Faster-RCNN-TensorFlow-Python3\data/coco/PythonAPI/setup.py文件:在第15行加上
,
Extension( 'lib.utils.cython_bbox',
sources=['../../../lib/utils/bbox.c','../../../lib/utils/bbox.pyx'],
include_dirs = [np.get_include(), '/lib/utils'],
extra_compile_args=[], )
3.4.2. 由于没有bbox.c和blob.py文件。所以要先在Faster-RCNN-TensorFlow-Python3\lib\utils执行
python setup.py build_ext --inplace
生成cython_bbox.c和cython_bbox.pyx,然后将这两个改名为bbox.c和bbox.pyx。
3.4.3. 再在./data/coco/PythonAPI下面运行
python setup.py build_ext --inplace
python setup.py build_ext install
4. 测试并可视化训练loss
4.1 首先进入demo.py文件,修改几处代码:
1)39行的vgg16_faster_rcnn_iter_70000.ckpt修改为vgg16_faster_rcnn_iter_20000.ckpt
2)由于本文是复现VGG16模型,所以将demo.py的第104行原来默认的res101,改成我们现在用的vgg16。
3)106行的default='pascal_voc_0712’改为pascal_voc
4)在根目录下新建output/vgg16/voc_2007_trainval/default,将训练好的第20000次的模型放入此文件夹中,结构如图:
4.2 可视化loss
4.2.1 在train.py文件里的def train(self):函数里添加的代码如下,在train.py同一层级目录下新建write_loss.txt文档,total loss写入该文档
filename = './write_loss.txt'#添加的代码
while iter < cfg.FLAGS.max_iters + 1:
# Learning rate
if iter == cfg.FLAGS.step_size + 1:
# Add snapshot here before reducing the learning rate
# self.snapshot(sess, iter)
sess.run(tf.assign(lr, cfg.FLAGS.learning_rate * cfg.FLAGS.gamma))
timer.tic()
# Get training data, one batch at a time
blobs = self.data_layer.forward()
# Compute the graph without summary
try:
rpn_loss_cls, rpn_loss_box, loss_cls, loss_box, total_loss = self.net.train_step(sess, blobs, train_op)
except Exception:
# if some errors were encountered image is skipped without increasing iterations
print('image invalid, skipping')
continue
timer.toc()
iter += 1
if iter % (cfg.FLAGS.snapshot_iterations) == 0:
self.snapshot(sess, iter )
# Display training information
if iter % (cfg.FLAGS.display) == 0:
#添加的代码
fw = open(filename,'a')
fw.write(str(int(iter)) + ' '+ str(float('%.4f' % total_loss))+"\n")
fw.close()
#添加结束
print('iter: %d / %d, total loss: %.6f\n >>> rpn_loss_cls: %.6f\n '
'>>> rpn_loss_box: %.6f\n >>> loss_cls: %.6f\n >>> loss_box: %.6f\n ' % \
(iter, cfg.FLAGS.max_iters, total_loss, rpn_loss_cls, rpn_loss_box, loss_cls, loss_box))
print('speed: {:.3f}s / iter'.format(timer.average_time))
4.2.2
import random
import numpy as np
import matplotlib.pyplot as plt
y_ticks = [0,0, 0.5, 1.0, 2.0, 3.0, 4.0, 5.0] # 纵坐标的值,可以自己设置。
data_path = 'E:\\yolodaima\\Faster-RCNN-TensorFlow-Python3-master-vgg16\\write_loss-20000.txt' # log_loss的路径。
result_path = 'E:\\yolodaima\\Faster-RCNN-TensorFlow-Python3-master-vgg16\\total_loss' # 保存结果的路径。
data1_loss =np.loadtxt(data_path)
x = data1_loss[:,0] #冒号左边是行范围,冒号右边列范围。取第一列
y = data1_loss[:,1] #取第2列
################开始画图
fig = plt.figure()
ax = fig.add_subplot(1, 1, 1)
ax.plot(x, y, label='total_loss')
plt.yticks(y_ticks) # 如果不想自己设置纵坐标,可以注释掉。
#plt.grid()
ax.legend(loc='best')
ax.set_title('The loss curves')
ax.set_xlabel('batches')
fig.savefig(result_path)
5. 画p-r曲线和计算ap
文件顺序test.py-->pascal_voc.py-->voc_eval.py
5.1 lib/datasets/passcal_voc.py:
passcal_voc在开头加入这几句
import matplotlib.pyplot as plt
from sklearn.metrics import precision_recall_curve
from itertools import cycle
import pylab as pl
def _do_python_eval(self, output_dir='output'):
annopath = self._devkit_path + '\\VOC' + self._year + '\\Annotations\\' + '{:s}.xml'
imagesetfile = os.path.join(
self._devkit_path,
'VOC' + self._year,
'ImageSets',
'Main',
self._image_set + '.txt')
cachedir = os.path.join(self._devkit_path, 'annotations_cache')
aps = []
#添加
recs=[]
precs=[]
#结束
# The PASCAL VOC metric changed in 2010
use_07_metric = True if int(self._year) < 2010 else False
print('VOC07 metric? ' + ('Yes' if use_07_metric else 'No'))
if not os.path.isdir(output_dir):
os.mkdir(output_dir)
for i, cls in enumerate(self._classes):
if cls == '__background__':
continue
filename = self._get_voc_results_file_template().format(cls)
rec, prec, ap = voc_eval(
filename, annopath, imagesetfile, cls, cachedir, ovthresh=0.5,
use_07_metric=use_07_metric)
aps += [ap]
#添加代码,这里的rec和prec是由voc_eval.py得到
pl.plot(rec, prec, lw=2,
label='Precision-recall curve of class {} (area = {:.4f})'
''.format(cls, ap))
print(('AP for {} = {:.4f}'.format(cls, ap)))
with open(os.path.join(output_dir, cls + '_pr.pkl'), 'wb') as f:
pickle.dump({'rec': rec, 'prec': prec, 'ap': ap}, f)
#画p-r曲线图
pl.xlabel('Recall')
pl.ylabel('Precision')
plt.grid(True)
pl.ylim([0.0, 1.2])
pl.xlim([0.0, 1.0])
pl.title('Precision-Recall')
pl.legend(loc="upper right")
plt.show()
print(('Mean AP = {:.4f}'.format(np.mean(aps))))
print('~~~~~~~~')
print('Results:')
for ap in aps:
print(('{:.3f}'.format(ap)))
print(('{:.3f}'.format(np.mean(aps))))
print('~~~~~~~~')
print('')
print('--------------------------------------------------------------')
print('Results computed with the **unofficial** Python eval code.')
print('Results should be very close to the official MATLAB eval code.')
print('Recompute with `./tools/reval.py --matlab ...` for your paper.')
print('-- Thanks, The Management')
print('--------------------------------------------------------------')
passcal_voc.py文件的以下函数修改
passcal_voc.py文件的这个函数注释一部分:生成每类的预测框的文本文件:包括图片名,置信度,四个坐标值
def evaluate_detections(self, all_boxes, output_dir):
self._write_voc_results_file(all_boxes)
self._do_python_eval(output_dir)
if self.config['matlab_eval']:
self._do_matlab_eval(output_dir)
#if self.config['cleanup']:
# for cls in self._classes:
# if cls == '__background__':
# continue
# filename = self._get_voc_results_file_template().format(cls)
# os.remove(filename)
5.2 voc_eval.py文件中做如下更改:
def parse_rec(filename):#读取标注xml文件
""" Parse a PASCAL VOC xml file """
tree = ET.parse(''+filename)
objects = []#./data/VOCdevkit2007/VOC2007/Annotations/
for obj in tree.findall('object'):
obj_struct = {}
obj_struct['name'] = obj.find('name').text
obj_struct['pose'] = obj.find('pose').text
obj_struct['truncated'] = int(obj.find('truncated').text)
obj_struct['difficult'] = int(obj.find('difficult').text)
bbox = obj.find('bndbox')
obj_struct['bbox'] = [int(bbox.find('xmin').text),
int(bbox.find('ymin').text),
int(bbox.find('xmax').text),
int(bbox.find('ymax').text)]
objects.append(obj_struct)
return objects
5.3在faster-rcnn-tensorflow-python3.5-master文件夹下新建test_net.py
# !/usr/bin/env python
# --------------------------------------------------------
# Tensorflow Faster R-CNN
# Licensed under The MIT License [see LICENSE for details]
# Written by Xinlei Chen, based on code from Ross Girshick
# --------------------------------------------------------
"""
Demo script showing detections in sample images.
See README.md for installation instructions before running.
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import argparse
import os
import tensorflow as tf
from lib.nets.vgg16 import vgg16
from lib.datasets.factory import get_imdb
from lib.utils.test import test_net
NETS = {'vgg16': ('vgg16_faster_rcnn_iter_20000.ckpt',)} # 自己需要修改:训练输出模型
DATASETS = {'pascal_voc': ('voc_2007_trainval',), 'pascal_voc_0712': ('voc_2007_trainval+voc_2012_trainval',)}
def parse_args():
"""Parse input arguments."""
parser = argparse.ArgumentParser(description='Tensorflow Faster R-CNN test')
parser.add_argument('--net', dest='demo_net', help='Network to use [vgg16 res101]',
choices=NETS.keys(), default='vgg16')
parser.add_argument('--dataset', dest='dataset', help='Trained dataset [pascal_voc pascal_voc_0712]',
choices=DATASETS.keys(), default='pascal_voc')
args = parser.parse_args()
return args
if __name__ == '__main__':
args = parse_args()
# model path
demonet = args.demo_net
dataset = args.dataset
tfmodel = os.path.join('output', demonet, DATASETS[dataset][0], 'default', NETS[demonet][0]) # 模型路径
# 获得模型文件名称
filename = (os.path.splitext(tfmodel)[0]).split('\\')[-1]
filename = 'default' + '/' + filename
imdb = get_imdb("voc_2007_test") # 得到
imdb.competition_mode('competition mode')
if not os.path.isfile(tfmodel + '.meta'):
print(tfmodel)
raise IOError(('{:s} not found.\nDid you download the proper networks from '
'our server and place them properly?').format(tfmodel + '.meta'))
# set config
tfconfig = tf.ConfigProto(allow_soft_placement=True)
tfconfig.gpu_options.allow_growth = True
# init session
sess = tf.Session(config=tfconfig)
# load network
if demonet == 'vgg16':
net = vgg16(batch_size=1)
# elif demonet == 'res101':
# net = resnetv1(batch_size=1, num_layers=101)
else:
raise NotImplementedError
net.create_architecture(sess, "TEST", 2, # 自己需要修改:类别数量+1
tag='default', anchor_scales=[8, 16, 32])
saver = tf.train.Saver()
saver.restore(sess, tfmodel)
print('Loaded network {:s}'.format(tfmodel))
print(filename)
test_net(sess, net, imdb, filename, max_per_image=100)
sess.close()
最后终端运行python test_net.py即可画出p-r曲线并计算出每类的ap。
切记,改变测试图像评估模型时候,记得删除annots.pkl文件。
默认在.\data\VOCdevkit2007\annotations_cache下