目录
本文默认你已经安装好了py-faster-rcnn源码,如果安装好了,那么就开始下面的工作吧!
源码地址:https://github.com/rbgirshick/py-faster-rcnn
kiit数据集转voc
kitti数据集下载地址:http://www.cvlibs.net/datasets/kitti/eval_object.php?obj_benchmark=2d
文件结构
创建下面文件,将下载好的数据集图片放入JPEGImages中,标注文件放入training中
zero@zero:~/kitti$ tree -L 1
.
├── Annotations #存放xml文件
├── ImageSets
└── Main
├── test.txt
├── train.txt
├── trainval.txt
└── val.txt
├── JPEGImages #存放kitti原数据集图片的地址
└── training #标注文件存放地址
└── results #存放训练后的一些相关文件(不创建的话训练会报错)
└── VOC2007
└── Main
转换代码
运行代码,生成xml文件以及四个txt文件
#!/usr/bin/Python
# -*- coding: utf-8 -*-
import cv2
import glob
from fasterrcnn_kitti.utils import writexml
#取样本的90%
trainval = open("/home/zero/kitti/ImageSets/Main/trainval.txt","w")
#70%
train = open("/home/zero/kitti/ImageSets/Main/train.txt","w")
#20%
val = open("/home/zero/kitti/ImageSets/Main/val.txt","w")
#取样本的10%
test = open("/home/zero/kitti/ImageSets/Main/test.txt","w")
#读取标注信息并进行解析
#打印文件路径和名称
list_anno_files = glob.glob("/home/zero/kitti/training/label_2/*")
idx = 0
#遍历文件
for file_path in list_anno_files:
#打开文件
with open(file_path) as file:
#按行读取文件
anno_infos = file.readlines()
print(anno_infos)
bboxes = []
typename = []
#遍历行的内容
for anno_item in anno_infos:
anno_infos = anno_item.split(" ")
#去掉'Misc' 'Dontcare'
if anno_infos[0] == "Misc" or anno_infos[0] == "DontCare":
continue
else:
bbox = (int(float(anno_infos[4])),int(float(anno_infos[5])),
int(float(anno_infos[6])),int(float(anno_infos[7])))
bboxes.append(bbox)
typename.append(anno_infos[0])
filename= file_path.split("/")[-1].replace("txt","png")
xmlpath = "/home/zero/kitti/Annotations/" + filename.replace("png","xml")
imgpath = "/home/zero/kitti/JPEGImages/" + filename
saveimg = cv2.imread(imgpath)
writexml(filename, saveimg, bboxes, xmlpath, typename)
if idx > len(list_anno_files) * 0.9:
test.write(filename.replace(".png","\n"))
else:
trainval.write(filename.replace(".png","\n"))
if idx > len(list_anno_files) * 0.7:
val.write(filename.replace(".png","\n"))
else:
train.write(filename.replace(".png","\n"))
idx += 1
trainval.close()
train.close()
val.close()
test.close()
训练过程
修改 *train.pt
models/pascal_voc/ZF/faster_rcnn_alt_opt下
stage1_fast_rcnn_train.pt
name: "ZF"
layer {
name: 'data'
type: 'Python'
top: 'data'
top: 'rois'
top: 'labels'
top: 'bbox_targets'
top: 'bbox_inside_weights'
top: 'bbox_outside_weights'
python_param {
module: 'roi_data_layer.layer'
layer: 'RoIDataLayer'
param_str: "'num_classes': 8" #类别+1
}
}
layer {
name: "cls_score"
type: "InnerProduct"
bottom: "fc7"
top: "cls_score"
param { lr_mult: 1.0 }
param { lr_mult: 2.0 }
inner_product_param {
num_output: 8 #类别+1
weight_filler {
type: "gaussian"
std: 0.01
}
bias_filler {
type: "constant"
value: 0
}
}
}
layer {
name: "bbox_pred"
type: "InnerProduct"
bottom: "fc7"
top: "bbox_pred"
param { lr_mult: 1.0 }
param { lr_mult: 2.0 }
inner_product_param {
num_output: 32 #(类别+1)× 4
weight_filler {
type: "gaussian"
std: 0.001
}
bias_filler {
type: "constant"
value: 0
}
}
}
stage1_rpn_train.pt
name: "ZF"
layer {
name: 'input-data'
type: 'Python'
top: 'data'
top: 'im_info'
top: 'gt_boxes'
python_param {
module: 'roi_data_layer.layer'
layer: 'RoIDataLayer'
param_str: "'num_classes': 8"
}
}
stage2_fast_rcnn_train.pt
同stage1_rpn_train.pt修改地方相同。
stage2_rpn_train.pt
同stage1_rpn_train.pt修改地方相同。
faster_rcnn_test.pt
layer {
name: "cls_score"
type: "InnerProduct"
bottom: "fc7"
top: "cls_score"
inner_product_param {
num_output: 8
}
}
layer {
name: "bbox_pred"
type: "InnerProduct"
bottom: "fc7"
top: "bbox_pred"
inner_product_param {
num_output: 32
}
}
修改pascal_voc.py
lib/datasets/pascal_voc.py
首先将代码中所有的 self._devkit_path 替换为 self._data_path
(1)class这里我改为了大写,为了和voc中的类别一致!!否者会报错
self._data_path = "/home/zero/kitti"#voc数据集存放的位置
self._classes = ('__background__', # always index 0
'Person_sitting', 'Truck', 'Van', 'Pedestrian',
'Cyclist', 'Tram', 'Car')
(2)图片格式
self._image_ext = '.png'
(3)注释掉:
#assert os.path.exists(self._devkit_path), \
# 'VOCdevkit path does not exist: {}'.format(self._devkit_path)
(4)
# Make pixel indexes 0-based
x1 = float(bbox.find('xmin').text)
y1 = float(bbox.find('ymin').text)
x2 = float(bbox.find('xmax').text)
y2 = float(bbox.find('ymax').text)
cls = self._class_to_ind[obj.find('name').text.strip()]
(5)修改xml文件和图片存放地址
def _do_python_eval(self, output_dir = 'output'):
annopath = os.path.join(
self._data_path,#'VOC' + self._year,
'Annotations',
'{:s}.xml')
imagesetfile = os.path.join(
self._data_path,
'ImageSets',
'Main',
self._image_set + '.txt')
修改迭代次数
tools/train_faster_rcnn_alt_opt.py
max_iters = [10000, 5000, 10000, 5000] #如果时间充足可以使用默认的训练
开始训练
./experiments/scripts/faster_rcnn_alt_opt.sh 0 ZF pascal_voc
当我们需要重新训练时,需要清除掉data下的cache文件夹和kitti中的annotations_cache文件夹
训练后:
测试
修改\tools\demo.py
(1)
CLASSES = ('__background__', # always index 0
'Person_sitting', 'Truck', 'Van', 'Pedestrian',
'Cyclist', 'Tram', 'Car')
(2)选择你想测试的图片的存放在位置 /data/demo/中:
im_names = ['000016.png', '000017.png', '000018.png',
'000021.png', '000020.png']
运行
./tools/demo.py --net zf
结果:
参考链接
https://blog.csdn.net/sinat_30071459/article/details/51332084