tt100k交通标识目标检测与分类

永不言弃h

已于 2024-06-08 18:48:22 修改

阅读量959

点赞数 22

文章标签：目标检测分类人工智能

于 2024-06-08 18:47:50 首次发布

本文链接：https://blog.csdn.net/m0_57145438/article/details/139549330

版权

目标检测与分类实战

根据github上的一个Yolo-FastestV2参考作为参考，以及gd32ai-modelzoo的代码进行复现，选用TT100k交通标志检测数据集完成检测与分类的任务。模型需要部署到开发板上，对推理速度有一定的要求，最后选用YoloFastestV2模型。

文章目录

目标检测与分类实战

数据的预处理

因为我们需要将模型部署到gd32开发板上，而摄像头采用ov7670采集图像最大为640*480,而数据集的图片大小为2048*2048，所以我们将数据集的图像需要重新做切割。我按照每个图片的每一个标签的中心点做切割，切割成320*240大小的图像，这样将多目标的数据集转换成了单目标检测的数据集。再将标签数据归一化，转换成了yolo所需的txt文件标签。

请添加图片描述

# -*- coding: UTF-8 -*-

# @Project ：group_first 
# @File    ：preprocessing.py
# @IDE     ：PyCharm 
# @Author  ：liguochun0304@163.com
# @Date    ：2024/5/7 15:55

import json
import random

from PIL import ImageDraw, Image
import os
data_path = './data/annotations.json'
train_path = './data/train'
test_path = './data/test'

def save_json(save_path,data):
    assert save_path.split('.')[-1] == 'json'
    with open(save_path,'w') as file:
        json.dump(data,file)

def convert(size, box):
    dw = 1. / (size[0])
    dh = 1. / (size[1])
    x = box[0] + box[2] / 2.0
    y = box[1] + box[3] / 2.0
    w = box[2]
    h = box[3]

    x = x * dw
    w = w * dw
    y = y * dh
    h = h * dh
    return (x, y, w, h)
def print_progress_bar(current, total, prefix='', suffix='', decimals=1, length=50, fill='█', print_end="\r"):
    """
    调用时需要提供当前进度和总进度。
    @params:
        current     - 当前的进度值（Int）
        total       - 总进度值（Int）
        prefix      - 前缀字符串（Str）
        suffix      - 后缀字符串（Str）
        decimals    - 百分比精度（Int）
        length      - 进度条长度（Int）
        fill        - 填充字符（Str）
        print_end   - 结束打印字符（Str）
    """
    percent = ("{0:." + str(decimals) + "f}").format(100 * (current / float(total)))
    filled_length = int(length * current // total)
    bar = fill * filled_length + '-' * (length - filled_length)
    print(f'\r{prefix} |{bar}| {percent}% {suffix}', end=print_end)
    # 当进度条完成时换行
    if current == total:
        print()
    os.system('cls')
def normalize_bbox(bbox, image_width, image_height):
    """
    将边界框归一化到 [0, 1] 的范围。

    参数:
    bbox (list or tuple): 边界框的坐标，格式为 [x_min, y_min, x_max, y_max]。
    image_width (int or float): 图像的宽度。
    image_height (int or float): 图像的高度。

    返回:
    list: 归一化后的边界框坐标。
    """
    x_min, y_min, x_max, y_max = bbox

    x_min_normalized = x_min / image_width
    y_min_normalized = y_min / image_height
    x_max_normalized = x_max / image_width
    y_max_normalized = y_max / image_height

    return [x_min_normalized, y_min_normalized, x_max_normalized, y_max_normalized]


# 获取训练图片图片id
trains_id_ = os.listdir(train_path)
trains_id = []  # 图像id
for train_id in trains_id_:
    trains_id.append(train_id.split('.')[0])


# 获取测试图片图片id
tests_id_ = os.listdir(test_path)
tests_id = []  # 图像id
for test_id in tests_id_:
    tests_id.append(test_id.split('.')[0])

print('训练ID：',trains_id)
print('测试ID：',tests_id)

# 获取标签
f = open(data_path, 'r')
content = f.read()
annotations = json.loads(content)

# lable_name = []
with open('./dataset/trc.txt','r',encoding='utf-8') as f:
    lable_name = [line.strip() for line in f.readlines()]

for key,value in enumerate(lable_name):
    print(key,value)
width, height = 320,240
owidth,oheight = 2048,2048


def preprocessing(s_id,is_train):

    if is_train is True:
        open_path = train_path
        save_path = "./dataset/train"
    else:
        open_path = test_path
        save_path = "./dataset/val"

    sum_id = s_id
    for id in s_id:
        sum_id.pop(0)
        print_progress_bar(len(sum_id), len(s_id), prefix='Progress:', suffix='Complete', length=50)
        # print(f'处理：{id}中。。。')
        target_dir = annotations['imgs'][f'{id}']
        path = target_dir['path']
        path = path.split('.')[0]
        object = target_dir['objects']
        """
        resize图像
        """

        is_save = False
        encode = 0
        for obj in object:
            encode +=1


            # if object[0]['category'] not in lable_name:
            #     lable_name.append(object[0]['category'])
            xmin = obj['bbox']['xmin']
            ymin = obj['bbox']['ymin']
            # 右下点
            ymax = obj['bbox']['ymax']
            xmax = obj['bbox']['xmax']
            # print('原坐标：',xmin, ymin, xmax, ymax)
            # 计算中心点
            x_center = (xmin + xmax) / 2 + random.randint(-100, 100)
            y_center = (ymin + ymax) / 2 + random.randint(-100, 100)

            # print('原标签框位置：',xmin,ymax,xmax,ymin)

            # 计算图像边缘
            img_xmin = x_center - 160
            img_xmax = x_center + 160

            img_ymin = y_center - 120
            img_ymax = y_center + 120

            new_xmin = abs(xmin -img_xmin)
            new_ymin = abs(ymin -img_ymin)

            new_xmax = abs(new_xmin + (xmax - xmin))
            new_ymax = abs(new_ymin + (ymax - ymin))


            try:
                index = lable_name.index(obj['category'])
                boxx = [new_xmin, new_ymin, new_xmax, new_ymax]
                size = (width, height)
                # x, y, w, h = convert(size, boxx)
                x, y, w, h = normalize_bbox(boxx, size[0], size[1])
                # print('归一化：',x, y, w, h)
                xmin = str(x)
                ymin = str(y)
                xmax = str(w)
                ymax = str(h)
                content = str(index + 1) + ' ' + xmin + ' ' + ymin + ' ' + xmax + ' ' + ymax
                with open(f"{save_path}/{id}_{encode}.txt", 'a') as f:
                    f.write(content + '\n')
            except ValueError:
                print(f"{id}编号的标签为{obj['category']}，列表中没有此标签")
                continue

            try:
                img = Image.open(os.path.join(open_path, f'{id}.jpg'))
                # cropped_img.show()
                cropped_img = img.crop((img_xmin, img_ymin, img_xmax, img_ymax))
                # cropped_img.show()
                # cropped_img.save(f'./dataset/{open_path}/{train_id}.jpg')
                cropped_img.save(os.path.join(save_path, f"{id}_{encode}.jpg"))
                is_save = True
                if is_save is True:
                    # todo：写入文件路径
                    if is_train is True:
                        with open(f'./dataset/train.txt', 'a', encoding='utf-8') as f:
                            f.write(path+ f"_{encode}.jpg" + '\n')
                    else:
                        with open(f'./dataset/val.txt', 'a', encoding='utf-8') as f:
                            f.write(path+ f"_{encode}.jpg" + '\n')


            except FileNotFoundError:
                print(f'{open_path}\{id}.jpg 不存在')

if __name__ == '__main__':
    for id,is_train in (trains_id,True),(tests_id,False):
        preprocessing(id,is_train)

    # print(len(lable_name))
    # with open(".\dataset\data.txt", 'w') as f:
    #     for i in lable_name:
    #         f.write(i + '\n')

项目代码

能力有限，并未对Yolo-FastestV2做很大的改动，只是对该项目在本机中运行做遇到问题，做了一点改动。代码也上传至我的liguochun0304/Yolo-FastestV2 (github.com)。

训练结果

这样的结果其实我也很惊讶，即使是单目标检测，结果精度都这么低，相比较于coco数据集精度差了将近一倍，我暂时觉得是因为目标太小的问题。我暂时无法解决，我看到Yolo-FastestV2的作者有采用FPN模型，但是精度还是很低，其实还可以尝试采用其他模型作为训练，但我毕竟不是CV方向，并且时间不够，就没有继续往下钻研。我觉得目标小，有点类似于遥感的目标检测，可以采用遥感的目标检测算法，来检测该数据集（如过不是在嵌入式单片机上跑的话）可能会有好的结果出来。

D:\anaconda\envs\gd32ai\lib\site-packages\torch\functional.py:512: UserWarning: torch.meshgrid: in an upcoming release, it will be required to pass the indexing argument. (Triggered internally at C:\cb\pytorch_1000000000000\work\aten\src\ATen\native\TensorShape.cpp:3588.)
  return _VF.meshgrid(tensors, **kwargs)  # type: ignore[attr-defined]
Evaluation model:: 100%|██████████| 60/60 [00:38<00:00,  1.54it/s]
computer PR...
Evaluation model:: 100%|██████████| 60/60 [00:10<00:00,  5.74it/s]
Precision:0.281061 Recall:0.445965 AP:0.318521 F1:0.337140