yolov5自动标注

最新推荐文章于 2024-03-26 11:16:36 发布

WenZhaoYang123468

最新推荐文章于 2024-03-26 11:16:36 发布

阅读量1.8k

点赞数 1

分类专栏：深度学习文章标签： python 深度学习

本文链接：https://blog.csdn.net/WenZhaoYang123/article/details/127921463

版权

深度学习专栏收录该内容

7 篇文章 0 订阅

订阅专栏

自动标注前提：模型准确率尚可，误检框较少；否则，反而会加大标注工作量！

1.修改detect.py为如下，输出左上角和右下角xy坐标：

2.利用detect.py程序中--save-txt生成txt

3.使用如下代码将txt转成xml，请根据个人情况修改：

4.利用labelimg标注软件核对下xml是否正确

1.修改detect.py为如下，输出左上角和右下角xy坐标：

                for *xyxy, conf, cls in reversed(det):
                    if save_txt:  # Write to file
                        # xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist()  # normalized xywh
                        # line = (cls, *xywh, conf) if save_conf else (cls, *xywh)  # label format
                        line = (cls, *xyxy, conf) if save_conf else (cls, *xyxy)  # label format
                        with open(f'{txt_path}.txt', 'a') as f:
                            f.write(('%g ' * len(line)).rstrip() % line + '\n')

2.利用detect.py程序中--save-txt生成txt

def parse_opt():
    parser = argparse.ArgumentParser()
    parser.add_argument('--weights', nargs='+', type=str, default=ROOT / 'runs/train/exp2/weights/best.pt', help='model path(s)')
    parser.add_argument('--source', type=str, default=ROOT / 'test/', help='file/dir/URL/glob, 0 for webcam')
    parser.add_argument('--data', type=str, default=ROOT / 'data/VOC_safety.yaml', help='(optional) dataset.yaml path')
    parser.add_argument('--imgsz', '--img', '--img-size', nargs='+', type=int, default=[5184, 3888], help='inference size h,w')
    parser.add_argument('--conf-thres', type=float, default=0.25, help='confidence threshold')
    parser.add_argument('--iou-thres', type=float, default=0.45, help='NMS IoU threshold')
    parser.add_argument('--max-det', type=int, default=1000, help='maximum detections per image')
    parser.add_argument('--device', default='', help='cuda device, i.e. 0 or 0,1,2,3 or cpu')
    parser.add_argument('--view-img', action='store_true', help='show results')
    parser.add_argument('--save-txt', action='store_true', help='save results to *.txt')
    parser.add_argument('--save-conf', action='store_true', help='save confidences in --save-txt labels')
    parser.add_argument('--save-crop', action='store_true', help='save cropped prediction boxes')
    parser.add_argument('--nosave', action='store_true', help='do not save images/videos')
    parser.add_argument('--classes', nargs='+', type=int, help='filter by class: --classes 0, or --classes 0 2 3')
    parser.add_argument('--agnostic-nms', action='store_true', help='class-agnostic NMS')
    parser.add_argument('--augment', action='store_true', help='augmented inference')
    parser.add_argument('--visualize', action='store_true', help='visualize features')
    parser.add_argument('--update', action='store_true', help='update all models')
    parser.add_argument('--project', default=ROOT / 'runs/detect', help='save results to project/name')
    parser.add_argument('--name', default='exp', help='save results to project/name')
    parser.add_argument('--exist-ok', action='store_true', help='existing project/name ok, do not increment')
    parser.add_argument('--line-thickness', default=3, type=int, help='bounding box thickness (pixels)')
    parser.add_argument('--hide-labels', default=False, action='store_true', help='hide labels')
    parser.add_argument('--hide-conf', default=False, action='store_true', help='hide confidences')
    parser.add_argument('--half', action='store_true', help='use FP16 half-precision inference')
    parser.add_argument('--dnn', action='store_true', help='use OpenCV DNN for ONNX inference')
    opt = parser.parse_args()

3.使用如下代码将txt转成xml，请根据个人情况修改：

import os
from PIL import Image
import cv2
import numpy as np
from tqdm import tqdm

out0 = '''<annotation>
    <folder>%(folder)s</folder>
    <filename>%(name)s</filename>
    <path>%(path)s</path>
    <source>
        <database>Unknown</database>
    </source>
    <size>
        <width>%(width)d</width>
        <height>%(height)d</height>
        <depth>3</depth>
    </size>
    <segmented>0</segmented>
'''
out1 = '''<object>
        <name>%(class)s</name>
        <pose>Unspecified</pose>
        <truncated>0</truncated>
        <difficult>0</difficult>
        <bndbox>
        <xmin>%(xmin)d</xmin>
        <ymin>%(ymin)d</ymin>
        <xmax>%(xmax)d</xmax>
        <ymax>%(ymax)d</ymax>
        </bndbox>
    </object>
'''
out2 = '''</annotation>
'''

'''txt转xml'''


def translate(fdir, lists, txt_dir_temp):
    source = {}
    label = {}
    for jpg in tqdm(lists):
        if jpg[-4:] == '.jpg':
            image = cv2.imread(jpg)
            h, w, _ = image.shape
            fxml = jpg.replace('.jpg', '.xml')  # xml名称
            fxml = open(fxml, 'w')
            imgfile = jpg.split('/')[-1]  # 图片名称
            img_name_nojpg = imgfile.split('.')[-2]  # 图片名称
            source['name'] = imgfile  # filename
            source['path'] = jpg
            source['folder'] = os.path.basename(fdir)
            source['width'] = w
            source['height'] = h
            fxml.write(out0 % source)
            # txt = jpg.replace('.jpg', '.txt')
            txt_path = txt_dir_temp + img_name_nojpg + ".txt"
            source_file = open(txt_path)
            for line in source_file:
                staff = line.strip().split(' ')
                # box[0]:timeqmj_highpower001;box[1]:0.984745;box[2]:xmin;
                # box[3]:ymin;box[4]:xmax;box[5]:ymax
                if int(staff[0]) == 0:
                    name_t = "person"
                if int(staff[0]) == 1:
                    name_t = "wcaqm"
                if int(staff[0]) == 2:
                    name_t = "aqmzc"
                if int(staff[0]) == 3:
                    name_t = "zyaq_aqmbt"
                if int(staff[0]) == 4:
                    name_t = "aqd"
                label['class'] = name_t
                xmin = int(staff[1])
                ymin = int(staff[2])
                xmax = int(staff[3])
                ymax = int(staff[4])
                label['xmin'] = xmin
                label['ymin'] = ymin
                label['xmax'] = xmax
                label['ymax'] = ymax
                fxml.write(out1 % label)
            fxml.write(out2)


if __name__ == '__main__':
    file_dir = 'E:/11/'
    txt_dir = 'E:/labels/'
    lists = []
    img_names1 = os.listdir(file_dir)
    img_names1.sort()
    for i in img_names1:
        # print("i[-3:]:", i[-3:])
        if i[-3:] == 'jpg':
            lists.append(file_dir + '/' + i)  # 图片绝对路径
    translate(file_dir, lists, txt_dir)
    print('Done')