文章目录
前言
本文为8月31日目标检测学习笔记,分为两个章节:
- ADAS(Advanced Driver Assistance System);
- KITTI 数据集- Faster RCNN。
一、ADAS(Advanced Driver Assistance System)
先进驾驶辅助系统。
- 检测率、误报率:
- 每一个标记只允许有一个检测与之对应;
- 重复检测被视为错误检测。
二、KITTI 数据集- Faster RCNN
1、标注信息
2、KITTI 数据集转 VOC 数据集
统计需要检测的目标的类别:
import glob
list_anno_files = glob.glob(r"/*")
print(list_anno_files)
# 统计需要检测的目标的类别的数量
cate_list = []
for file_path in list_anno_files:
with open(file_path) as file:
anno_infos = file.readlines()
print(anno_infos) # 打印读取到的标注信息
for anno_item in anno_infos:
cate_list.append(anno_item.split(" ")[0])
print(set(cate_list))
将 Kitti 数据集转换层 VOC 格式:
import cv2
import glob
from utils import writexml
trainval = open("./", 'w')
train = open("./", 'w')
val = open("./", 'w')
test = open("./", 'w')
list_anno_files = glob.glob(r"/*")
idx = 0
for file_path in list_anno_files:
with open(file_path) as file:
anno_infos = file.readlines()
print(anno_infos) # 打印读取到的标注信息
bboxes = []
typename = []
for anno_item in anno_infos:
anno_infos = anno_item.split(" ")
if anno_infos[0] == "Misc" or anno_infos[1] == "DontCare":
continue
else:
bbox = (int(float(anno_infos[4])), int(float(anno_infos[5])),
int(float(anno_infos[6])), int(float(anno_infos[7])))
bboxes.append(bbox)
typename.append(anno_infos[0])
# 获得图像的 name
filename = file_path.split("/")[-1].replace("txt", "png")
xmlpath = "./" + filename.replace("png", "xml")
imgpath = "./" + filename
saveimg = cv2.imread(imgpath)
writexml(filename, saveimg, bboxes, typename)
if idx > len(list_anno_files) * 0.9:
test.write(filename.replace(".png", "\n"))
else:
trainval.write(filename.replace(".png", "\n"))
if idx > len(list_anno_files) * 0.7:
val.write(filename.replace(".png", "\n"))
else:
train.write(filename.replace(".png", "\n"))
idx += 1
trainval.close()
test.close()
val.close()
train.close()
3、Faster RCNN 环境搭建
import cv2
import glob
from utils import writexml
trainval = open("./", 'w')
train = open("./", 'w')
val = open("./", 'w')
test = open("./", 'w')
list_anno_files = glob.glob(r"/*")
idx = 0
for file_path in list_anno_files:
with open(file_path) as file:
anno_infos = file.readlines()
print(anno_infos) # 打印读取到的标注信息
bboxes = []
typename = []
for anno_item in anno_infos:
anno_infos = anno_item.split(" ")
if anno_infos[0] == "Misc" or anno_infos[1] == "DontCare":
continue
else:
bbox = (int(float(anno_infos[4])), int(float(anno_infos[5])),
int(float(anno_infos[6])), int(float(anno_infos[7])))
bboxes.append(bbox)
typename.append(anno_infos[0])
# 获得图像的 name
filename = file_path.split("/")[-1].replace("txt", "png")
xmlpath = "./" + filename.replace("png", "xml")
imgpath = "./" + filename
saveimg = cv2.imread(imgpath)
writexml(filename, saveimg, bboxes, typename)
if idx > len(list_anno_files) * 0.9:
test.write(filename.replace(".png", "\n"))
else:
trainval.write(filename.replace(".png", "\n"))
if idx > len(list_anno_files) * 0.7:
val.write(filename.replace(".png", "\n"))
else:
train.write(filename.replace(".png", "\n"))
idx += 1
trainval.close()
test.close()
val.close()
train.close()
配置参数修改:
__C.PIXEL_MEANS = np.array([[[102.9801, 115.9465, 122.7717]]])
__C.TRAIN.FG_THRESH = 0.5
# IOU >= thresh: positive example
__C.TRAIN.RPN_POSITIVE_OVERLAP = 0.7
# IOU < thresh: negative example
__C.TRAIN.RPN_NEGATIVE_OVERLAP = 0.3
4、模型测试
if __name__ == "__main__":
cfg.TEST.HAS_RPN = True
prototxt = ""
caffemodel = ""
caffe.set_mode_gpu()
caffe.set_device(0)
net = caffe.Net(prototxt, caffemodel, caffe.TEST)
# 读取测试数据
im_file = ""
cv2.imread(im_file)
im_show = im.copy()
# 前向运算
scores, boxes = im_detect(net, im)
conf_thersh = 0.8
nms_thresh = 0.3
for cls_ind, cls in enumerate(CLASSES[1:]):
cls_ind += 1 # because we skipped background
# 选择相对应的类别的 bounding box 的信息
cls_boxes = boxes[:, 4 * cls_ind:4 * (cls_ind + 1)]
cls_scores = scores[:, cls_ind]
dets = np.hstack((cls_boxes,
cls_scores[:, np.newaxis])).astype(np.float32)
keep = nms(dets, nms_thresh)
dets = dets[keep, :] # 过滤后的结果
inds = np.where(dets[:, -1] >= conf_thersh)[0]
# 取出响应的 bounding box 以及分数
if len(inds) > 0:
for i in inds:
bbox = dets[i, : 4]
score = dets[i, -1]
cv2.rectangle(im_show, int(bbox[0]), int(bbox[1]),
int(bbox[2]), int(bbox[3]), (255, 0, 0), thickness=2)
cv2.putText(im, "{}_{}".format(cls, score),
int(bbox[0]), int(bbox[1]),
cv2.FONT_HERSHEY_PLAIN, 1, (0, 0, 255), 2)
cv2.imshow("img", im)
cv2.waitKey(0)