手把手教你-Deformable-Detr训练个人数据集

最新推荐文章于 2024-07-22 14:29:40 发布

Jano23333

最新推荐文章于 2024-07-22 14:29:40 发布

阅读量3.6k

点赞数 2

文章标签：计算机视觉

本文链接：https://blog.csdn.net/Jano23333/article/details/133828483

版权

手把手教你-Deformable-Detr训练个人数据集

论文开源代码

https://github.com/fundamentalvision/deformable-detr

大佬修改的可训练个人数据集的开源代码

https://github.com/robinnarsinghranabhat/Deformable-DETR
这里我们要用的是大佬修改的代码

一、环境部署

1、创建虚拟环境

conda create -n Deformable-detr python=3.8

2、安装pytorch以及项目所需要的库
注意：pytorch版本需要与电脑或服务器上cuda版本对应(以cuda11.8版本为例)

pip install torch2.0.1+cu118 torchvision0.15.2+cu118 torchaudio==2.0.2 --index-url https://download.pytorch.org/whl/cu118

移动到代码目录下，安装库：

pip install -r requirements.txt

生成MultiScaleDeformableAttention

cd modules/ops
sh ./make.sh
pip list
python test.py

通过pip list可看到MultiScaleDeformableAttention已生成
运行test.py后输出的结果均为TRUE即可

注意：出现文件权限报错的，修改make.sh文件的权限即可

二、数据集制作

我这里用的是VOC2007的数据集 voc2007和2012数据集需要可自取
Deformable-Detr需要COCO数据集形式，所以需要将VOC数据集转换
coco数据集：

coco
- train2017
- val2017
- annotations
  - instances_train2017.json
  - instances_val2017.json

voc数据集：

VOCdevkit
- VOC2017
  - Annotations
  - ImageSets
  - JPEGImages

目标检测只需要VOC这几个文件

1、将所有图片分为训练集和验证集

import os, random, shutil
from shutil import copy2

if __name__ == '__main__':
    fileDir = "D:/SelfDataSet/VOCtrainval_06-Nov-2007/VOCdevkit/VOC2007/JPEGImages/"  # 源图片文件夹路径
    trainDir = "D:/SelfDataSet/VOCtrainval_06-Nov-2007/VOCdevkit/VOC2007/train2017/"  # 移动到新的文件夹路径
    valDir = "D:/SelfDataSet/VOCtrainval_06-Nov-2007/VOCdevkit/VOC2007/val2017/"
    # testDir = 'D:/SelfDataSet/VOCtrainval_06-Nov-2007/VOCdevkit/VOC2007/test2017/'

    train = []
    with open('D:/SelfDataSet/VOCtrainval_06-Nov-2007/VOCdevkit/VOC2007/ImageSets/Main/train.txt', 'r') as f:
        for line in f:
            train.append(line.strip('\n'))
    # print(train)
    for name in train:
        shutil.copy2(fileDir + name + '.jpg', trainDir + name + '.jpg')
        # shutil.copy2(fileDir + name + '.jpg', trainDir)

    val = []
    with open('D:/SelfDataSet/VOCtrainval_06-Nov-2007/VOCdevkit/VOC2007/ImageSets/Main/val.txt', 'r') as f:
        for line in f:
            val.append(line.strip('\n'))
        # print(train)
    for name in val:
        shutil.copy2(fileDir + name + '.jpg', valDir + name + '.jpg')

    # test = []
    # with open('E:/yolo3/VOCdevkit/VOC2012/ImageSets/Main/test1.txt', 'r') as f:
    #     for line in f:
    #         test.append(line.strip('\n'))
    #         # print(train)
    # for name in test:
    #     shutil.copy2(fileDir + name + '.jpg', testDir + name + '.jpg')

我这里是按VOC的设定来划分的训练集和验证集

2、将xml文件按训练集和验证集分好

import os, random, shutil

if __name__ == '__main__':
    fileDir = "D:/SelfDataSet/VOCtrainval_06-Nov-2007/VOCdevkit/VOC2007/Annotations/"  # 源图片文件夹路径
    trainDir = 'D:/SelfDataSet/VOCtrainval_06-Nov-2007/VOCdevkit/VOC2007/xml/xml_train/'
    valDir = 'D:/SelfDataSet/VOCtrainval_06-Nov-2007/VOCdevkit/VOC2007/xml/xml_val/'
    #testDir = 'E:/yolo3/VOCdevkit/VOC2012/xml/xml_test/'

    train = []
    with open('D:/SelfDataSet/VOCtrainval_06-Nov-2007/VOCdevkit/VOC2007/ImageSets/Main/train.txt', 'r') as f:
        for line in f:
            train.append(line.strip('\n'))
    # print(train)
    for name in train:
        shutil.copy2(fileDir + name + '.xml', trainDir + name + '.xml')

    val = []
    with open('D:/SelfDataSet/VOCtrainval_06-Nov-2007/VOCdevkit/VOC2007/ImageSets/Main/val.txt', 'r') as f:
        for line in f:
            val.append(line.strip('\n'))
        # print(train)
    for name in val:
        shutil.copy2(fileDir + name + '.xml', valDir + name + '.xml')

    # test = []
    # with open('E:/yolo3/VOCdevkit/VOC2012/ImageSets/Main/test1.txt', 'r') as f:
    #     for line in f:
    #         test.append(line.strip('\n'))
            # print(train)
    # for name in test:
    #     shutil.copy2(fileDir + name + '.xml', testDir + name + '.xml')

3、xml文件转换为json文件

#!/usr/bin/python

# pip install lxml

import sys
import os
import json
import xml.etree.ElementTree as ET
import glob

START_BOUNDING_BOX_ID = 1
PRE_DEFINE_CATEGORIES = None


# If necessary, pre-define category and its id
#  PRE_DEFINE_CATEGORIES = {"aeroplane": 1, "bicycle": 2, "bird": 3, "boat": 4,
#  "bottle":5, "bus": 6, "car": 7, "cat": 8, "chair": 9,
#  "cow": 10, "diningtable": 11, "dog": 12, "horse": 13,
#  "motorbike": 14, "person": 15, "pottedplant": 16,
#  "sheep": 17, "sofa": 18, "train": 19, "tvmonitor": 20}


def get(root, name):
    vars = root.findall(name)
    return vars


def get_and_check(root, name, length):
    vars = root.findall(name)
    if len(vars) == 0:
        raise ValueError("Can not find %s in %s." % (name, root.tag))
    if length > 0 and len(vars) != length:
        raise ValueError(
            "The size of %s is supposed to be %d, but is %d."
            % (name, length, len(vars))
        )
    if length == 1:
        vars = vars[0]
    return vars


#
# def get_filename_as_int(filename):
#     try:
#         filename = filename.replace("\\", "/")
#         filename = os.path.splitext(os.path.basename(filename))[0]
#         return int(filename)
#     except:
#         raise ValueError("Filename %s is supposed to be an integer." % (filename))


def get_filename_as_integer(filename):
    filename = filename.replace("\\", "/")
    filename = os.path.splitext(os.path.basename(filename))[0]
    filename1 = filename.split('_')
    filename2 = ''
    for i in range(len(filename1)):
        filename2 += filename1[i]
    return int(filename2)


def get_categories(xml_files):
    """Generate category name to id mapping from a list of xml files.

    Arguments:
        xml_files {list} -- A list of xml file paths.

    Returns:
        dict -- category name to id mapping.
    """
    classes_names = []
    for xml_file in xml_files:
        tree = ET.parse(xml_file)
        root = tree.getroot()
        for member in root.findall("object"):
            classes_names.append(member[0].text)
    classes_names = list(set(classes_names))
    classes_names.sort()
    return {name: i for i, name in enumerate(classes_names)}


def convert(xml_files, json_file):
    json_dict = {"images": [], "type": "instances", "annotations": [], "categories": []}
    if PRE_DEFINE_CATEGORIES is not None:
        categories = PRE_DEFINE_CATEGORIES
    else:
        categories = get_categories(xml_files)
    bnd_id = START_BOUNDING_BOX_ID
    for xml_file in xml_files:
        tree = ET.parse(xml_file)
        root = tree.getroot()
        path = get(root, "path")
        if len(path) == 1:
            filename = os.path.basename(path[0].text)
        elif len(path) == 0:
            filename = get_and_check(root, "filename", 1).text
        else:
            raise ValueError("%d paths found in %s" % (len(path), xml_file))
        ## The filename must be a number
        # image_id = get_filename_as_int(filename)
        image_id = get_filename_as_integer(filename)
        size = get_and_check(root, "size", 1)
        width = int(get_and_check(size, "width", 1).text)
        height = int(get_and_check(size, "height", 1).text)
        image = {
            "file_name": filename,
            "height": height,
            "width": width,
            "id": image_id,
        }
        json_dict["images"].append(image)
        ## Currently we do not support segmentation.
        #  segmented = get_and_check(root, 'segmented', 1).text
        #  assert segmented == '0'
        for obj in get(root, "object"):
            category = get_and_check(obj, "name", 1).text
            if category not in categories:
                new_id = len(categories)
                categories[category] = new_id
            category_id = categories[category]
            bndbox = get_and_check(obj, "bndbox", 1)
            xmin = int(get_and_check(bndbox, "xmin", 1).text) - 1
            ymin = int(float((get_and_check(bndbox, "ymin", 1).text))) - 1
            xmax = int(get_and_check(bndbox, "xmax", 1).text)
            ymax = int(get_and_check(bndbox, "ymax", 1).text)
            assert xmax > xmin
            assert ymax > ymin
            o_width = abs(xmax - xmin)
            o_height = abs(ymax - ymin)
            ann = {
                "area": o_width * o_height,
                "iscrowd": 0,
                "image_id": image_id,
                "bbox": [xmin, ymin, o_width, o_height],
                "category_id": category_id,
                "id": bnd_id,
                "ignore": 0,
                "segmentation": [],
            }
            json_dict["annotations"].append(ann)
            bnd_id = bnd_id + 1

    for cate, cid in categories.items():
        cat = {"supercategory": "none", "id": cid, "name": cate}
        json_dict["categories"].append(cat)

    os.makedirs(os.path.dirname(json_file), exist_ok=True)
    json_fp = open(json_file, "w")
    json_str = json.dumps(json_dict)
    json_fp.write(json_str)
    json_fp.close()


if __name__ == "__main__":
    import argparse
    root_path="D:/SelfDataSet/VOCtrainval_06-Nov-2007/VOCdevkit/VOC2007/"
    parser = argparse.ArgumentParser(
        description="Convert Pascal VOC annotation to COCO format."
    )
    parser.add_argument("xml_dir", default=root_path + "xml/xml_train", help="Directory path to xml files.", type=str)
    parser.add_argument("json_file", default=root_path + "coco/annotations/instances_train2017.json", help="Output COCO format json file.", type=str)
    args = parser.parse_args()
    xml_files = glob.glob(os.path.join(args.xml_dir, "*.xml"))

    # If you want to do train/test split, you can pass a subset of xml files to convert function.
    print("Number of xml files: {}".format(len(xml_files)))
    convert(xml_files, args.json_file)
    print("Success: {}".format(args.json_file))

至此，数据集转化已完成，将转换好的数据集按上面的coco数据集格式放好

三、训练

预训练模型下载

在这里插入图片描述

#对经过训练的模型进行微调 : 
python -u main.py --output_dir exps/iter_refine/ --with_box_refine --two_stage --resume  ./saved_models/r50_deformable_detr_plus_iterative_bbox_refinement_plus_plus_two_stage-checkpoint.pth --coco_path ./custom_files --num_classes=3`

重要参数 :

coco_path : 数据集路径`
output_dir : 模型输出路径.
resume : 从提供的模型进行微调
num_classes :
Deformable DETR 最初在91个类别上训练，假设，要对两个类进行微调，即yes-checkbox和no-checkbox。

将 num_classes 设置为 3 (数据集标签总数 + 1). 加1是为了说明无对象类.
不用命令行运行也可在main.py上修改这几个参数，运行main.py进行训练。

运行时会出现的警告

1

UserWarning: The parameter ‘pretrained’ is deprecated since 0.13 and may be removed in the future, please use ‘weights’ instead.
UserWarning: Arguments other than a weight enum or None for ‘weights’ are deprecated since 0.13 and may be removed in the future. The current behavior is equivalent to passing weights=ResNet101_Weights.IMAGENET1K_V1. You can also use weights=ResNet101_Weights.DEFAULT to get the most up-to-date weights.

pretrain参数在torchvision0.13版本后弃用了，修改方法：
在models文件夹下的backbone.py

将这句改为提示的 weights=ResNet101_Weights.DEFAULT

2

解决警告 UserWarning: torch.meshgrid: in an upcoming release, it will be required to
将虚拟环境下的functional.py修改
将return _VF.meshgrid(tensors, **kwargs) # type: ignore[attr-defined]
修改为return _VF.meshgrid(tensors, **kwargs, indexing = ‘ij’) # type: ignore[attr-defined]

四、预测

import cv2
from PIL import Image
import numpy as np
import os
import time

import torch
from torch import nn
# from torchvision.models import resnet50
import torchvision.transforms as T
from main import get_args_parser as get_main_args_parser
from models import build_model

torch.set_grad_enabled(False)

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print("[INFO] 当前使用{}做推断".format(device))

# 图像数据处理
transform = T.Compose([
    T.Resize(800),
    T.ToTensor(),
    T.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])


# 将xywh转xyxy
def box_cxcywh_to_xyxy(x):
    x_c, y_c, w, h = x.unbind(1)
    b = [(x_c - 0.5 * w), (y_c - 0.5 * h),
         (x_c + 0.5 * w), (y_c + 0.5 * h)]
    return torch.stack(b, dim=1)


# 将0-1映射到图像
def rescale_bboxes(out_bbox, size):
    img_w, img_h = size
    b = box_cxcywh_to_xyxy(out_bbox)
    b = b.cpu().numpy()
    b = b * np.array([img_w, img_h, img_w, img_h], dtype=np.float32)
    return b


# plot box by opencv
def plot_result(pil_img, prob, boxes, save_name=None, imshow=False, imwrite=False):
    LABEL = ['all','hat', 'person', 'groundrod', 'vest', 'workclothes_clothes', 'workclothes_trousers', 'winter_clothes',
             'winter_trousers', 'noworkclothes_clothes', 'noworkclothes_trousers', 'height', 'safteybelt', 'smoking',
             'noheight', 'fire', 'extinguisher', 'roll_workclothes', 'roll_noworkclothes', 'insulating_gloves', 'car',
             'fence', 'bottle', 'shorts', 'holes', 'single_ladder', 'down', 'double_ladder', 'oxygen_horizontally',
             'oxygen_vertically', 'acetylene_vertically', 'acetylene_horizontally']

    len(prob)
    opencvImage = cv2.cvtColor(np.array(pil_img), cv2.COLOR_RGB2BGR)


    if len(prob) == 0:
        print("[INFO] NO box detect !!! ")
        if imwrite:
            if not os.path.exists("./result/pred_no"):
                os.makedirs("./result/pred_no")
            cv2.imwrite(os.path.join("./result/pred_no", save_name), opencvImage)
        return

    for p, (xmin, ymin, xmax, ymax) in zip(prob, boxes):
        cl = p.argmax()
        label_text = '{}: {}%'.format(LABEL[cl], round(p[cl] * 100, 2))

        cv2.rectangle(opencvImage, (int(xmin), int(ymin)), (int(xmax), int(ymax)), (255, 255, 0), 2)
        cv2.putText(opencvImage, label_text, (int(xmin) + 10, int(ymin) + 30), cv2.FONT_HERSHEY_SIMPLEX, 1,
                    (255, 255, 0), 2)

    if imshow:
        cv2.imshow('detect', opencvImage)
        cv2.waitKey(0)

    if imwrite:
        if not os.path.exists("./result/pred"):
            os.makedirs('./result/pred')
        cv2.imwrite('./result/pred/{}'.format(save_name), opencvImage)

def load_model(model_path , args):

    model, _, _ = build_model(args)
    model.cuda()
    model.eval()
    state_dict = torch.load(model_path)  # <-----------修改加载模型的路径
    model.load_state_dict(state_dict["model"])
    model.to(device)
    print("load model sucess")
    return model

# 单张图像的推断
def detect(im, model, transform, prob_threshold=0.7):
    # mean-std normalize the input image (batch-size: 1)
    img = transform(im).unsqueeze(0)

    # demo model only support by default images with aspect ratio between 0.5 and 2
    # if you want to use images with an aspect ratio outside this range
    # rescale your image so that the maximum size is at most 1333 for best results
    
    #assert img.shape[-2] <= 1600 and img.shape[
    #                                     -1] <= 1600, 'demo model only supports images up to 1600 pixels on each side'

    # propagate through the model
    img = img.to(device)
    start = time.time()
    outputs = model(img)
    #end = time.time()
    # keep only predictions with 0.7+ confidence
    # print(outputs['pred_logits'].softmax(-1)[0, :, :-1])
    probas = outputs['pred_logits'].softmax(-1)[0, :, :-1]
    keep = probas.max(-1).values > prob_threshold
    #end = time.time()

    probas = probas.cpu().detach().numpy()
    keep = keep.cpu().detach().numpy()

    # convert boxes from [0; 1] to image scales
    bboxes_scaled = rescale_bboxes(outputs['pred_boxes'][0, keep], im.size)
    end = time.time()
    return probas[keep], bboxes_scaled, end - start


if __name__ == "__main__":
    
    main_args = get_main_args_parser().parse_args()
#加载模型
    dfdetr = load_model('exps/r50_deformable_detr/checkpoint0049.pth',main_args)

    files = os.listdir("coco/testdata/test2017")

    cn = 0
    waste=0
    for file in files:
        img_path = os.path.join("coco/testdata/test2017", file)
        im = Image.open(img_path)

        scores, boxes, waste_time = detect(im, dfdetr, transform)
        plot_result(im, scores, boxes, save_name=file, imshow=False, imwrite=True)
        print("{} [INFO] {} time: {} done!!!".format(cn,file, waste_time))

        cn+=1
        waste+=waste_time
    waste_avg = waste/cn
    print(waste_avg)