8月31日目标检测学习笔记——KITTI


前言

本文为8月31日目标检测学习笔记,分为两个章节:

  • ADAS(Advanced Driver Assistance System);
  • KITTI 数据集- Faster RCNN。

一、ADAS(Advanced Driver Assistance System)

先进驾驶辅助系统。

  • 检测率、误报率:
    • 每一个标记只允许有一个检测与之对应;
    • 重复检测被视为错误检测。

二、KITTI 数据集- Faster RCNN

1、标注信息

1

2、KITTI 数据集转 VOC 数据集

统计需要检测的目标的类别:

import glob

list_anno_files = glob.glob(r"/*")

print(list_anno_files)

# 统计需要检测的目标的类别的数量
cate_list = []

for file_path in list_anno_files:
    with open(file_path) as file:
        anno_infos = file.readlines()

        print(anno_infos) # 打印读取到的标注信息
        for anno_item in anno_infos:
            cate_list.append(anno_item.split(" ")[0])

print(set(cate_list))

将 Kitti 数据集转换层 VOC 格式:

import cv2
import glob
from utils import writexml

trainval = open("./", 'w')
train = open("./", 'w')
val = open("./", 'w')
test = open("./", 'w')

list_anno_files = glob.glob(r"/*")

idx = 0

for file_path in list_anno_files:
    with open(file_path) as file:
        anno_infos = file.readlines()

        print(anno_infos) # 打印读取到的标注信息

        bboxes = []
        typename = []
        for anno_item in anno_infos:

            anno_infos = anno_item.split(" ")
            if anno_infos[0] == "Misc" or anno_infos[1] == "DontCare":

                continue
            else:
                bbox = (int(float(anno_infos[4])), int(float(anno_infos[5])),
                        int(float(anno_infos[6])), int(float(anno_infos[7])))

                bboxes.append(bbox)
                typename.append(anno_infos[0])

        # 获得图像的 name
        filename = file_path.split("/")[-1].replace("txt", "png")
        xmlpath = "./" + filename.replace("png", "xml")

        imgpath = "./" + filename

        saveimg = cv2.imread(imgpath)

        writexml(filename, saveimg, bboxes, typename)

        if idx > len(list_anno_files) * 0.9:
            test.write(filename.replace(".png", "\n"))
        else:
            trainval.write(filename.replace(".png", "\n"))
            if idx > len(list_anno_files) * 0.7:
                val.write(filename.replace(".png", "\n"))
            else:
                train.write(filename.replace(".png", "\n"))
        idx += 1
        
trainval.close()
test.close()
val.close()
train.close()

3、Faster RCNN 环境搭建

import cv2
import glob
from utils import writexml

trainval = open("./", 'w')
train = open("./", 'w')
val = open("./", 'w')
test = open("./", 'w')

list_anno_files = glob.glob(r"/*")

idx = 0

for file_path in list_anno_files:
    with open(file_path) as file:
        anno_infos = file.readlines()

        print(anno_infos) # 打印读取到的标注信息

        bboxes = []
        typename = []
        for anno_item in anno_infos:

            anno_infos = anno_item.split(" ")
            if anno_infos[0] == "Misc" or anno_infos[1] == "DontCare":

                continue
            else:
                bbox = (int(float(anno_infos[4])), int(float(anno_infos[5])),
                        int(float(anno_infos[6])), int(float(anno_infos[7])))

                bboxes.append(bbox)
                typename.append(anno_infos[0])

        # 获得图像的 name
        filename = file_path.split("/")[-1].replace("txt", "png")
        xmlpath = "./" + filename.replace("png", "xml")

        imgpath = "./" + filename

        saveimg = cv2.imread(imgpath)

        writexml(filename, saveimg, bboxes, typename)

        if idx > len(list_anno_files) * 0.9:
            test.write(filename.replace(".png", "\n"))
        else:
            trainval.write(filename.replace(".png", "\n"))
            if idx > len(list_anno_files) * 0.7:
                val.write(filename.replace(".png", "\n"))
            else:
                train.write(filename.replace(".png", "\n"))
        idx += 1

trainval.close()
test.close()
val.close()
train.close()

配置参数修改:

__C.PIXEL_MEANS = np.array([[[102.9801, 115.9465, 122.7717]]])
__C.TRAIN.FG_THRESH = 0.5
# IOU >= thresh: positive example
__C.TRAIN.RPN_POSITIVE_OVERLAP = 0.7
# IOU < thresh: negative example
__C.TRAIN.RPN_NEGATIVE_OVERLAP = 0.3

4、模型测试

if __name__ == "__main__":
    cfg.TEST.HAS_RPN = True

    prototxt = ""
    caffemodel = ""

    caffe.set_mode_gpu()
    caffe.set_device(0)

    net = caffe.Net(prototxt, caffemodel, caffe.TEST)

    # 读取测试数据
    im_file = ""

    cv2.imread(im_file)

    im_show = im.copy()

    # 前向运算
    scores, boxes = im_detect(net, im)

    conf_thersh = 0.8
    nms_thresh = 0.3

    for cls_ind, cls in enumerate(CLASSES[1:]):
        cls_ind += 1  # because we skipped background

        # 选择相对应的类别的 bounding box 的信息
        cls_boxes = boxes[:, 4 * cls_ind:4 * (cls_ind + 1)]
        cls_scores = scores[:, cls_ind]
        dets = np.hstack((cls_boxes,
                          cls_scores[:, np.newaxis])).astype(np.float32)
        keep = nms(dets, nms_thresh)
        dets = dets[keep, :] # 过滤后的结果

        inds = np.where(dets[:, -1] >= conf_thersh)[0]

        # 取出响应的 bounding box 以及分数
        if len(inds) > 0:
            for i in inds:
                bbox = dets[i, : 4]
                score = dets[i, -1]

                cv2.rectangle(im_show, int(bbox[0]), int(bbox[1]),
                              int(bbox[2]), int(bbox[3]), (255, 0, 0), thickness=2)

                cv2.putText(im, "{}_{}".format(cls, score),
                            int(bbox[0]), int(bbox[1]),
                            cv2.FONT_HERSHEY_PLAIN, 1, (0, 0, 255), 2)

                cv2.imshow("img", im)
                cv2.waitKey(0)

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值