复现mask2former并跑通自定义数据集

最新推荐文章于 2025-03-23 20:18:25 发布

Andrew_Xzw

最新推荐文章于 2025-03-23 20:18:25 发布

阅读量5.5k

点赞数 10

文章标签：神经网络人工智能深度学习机器学习目标检测

本文链接：https://blog.csdn.net/weixin_50557558/article/details/137464759

版权

本文介绍了如何将LabelMe标注的json数据转换为COCO格式，以便于mask2former模型的使用，包括数据集注册、配置文件的修改以及一个小工具用于生成带标签的可视化图像。

摘要生成于 C知道，由 DeepSeek-R1 满血版支持，前往体验 >

1.json数据集转coco格式

我们一般使用如labelme标注软件对数据集进行标注，保存为json文件。但这里的mask2former只支持固定数据格式，我们转为通用性很高的coco格式。通过拉取代码文件。

git clone https://gitcode.com/fcakyon/labelme2coco.git
cd labelme2coco-master
python setup.py install

注意image要和json放在同一文件夹下：
在这里插入图片描述

使用如下代码，将标注的json文件转成coco格式的json。

import os
import json
import PIL.Image
import PIL.ImageDraw
import numpy as np
from labelme2coco.utils import create_dir, list_jsons_recursively
from labelme2coco.image_utils import read_image_shape_as_dict


class labelme2coco(object):
    def __init__(self, labelme_folder='', save_json_path='./new.json'):
        """
        Args:
            labelme_folder: folder that contains labelme annotations and image files
            save_json_path: path for coco json to be saved
        """
        self.save_json_path = save_json_path
        self.images = []
        self.categories = []
        self.annotations = []
        self.label = []
        self.annID = 1
        self.height = 0
        self.width = 0

        # create save dir
        save_json_dir = os.path.dirname(save_json_path)
        create_dir(save_json_dir)

        # get json list
        _, labelme_json = list_jsons_recursively(labelme_folder)
        self.labelme_json = labelme_json

        self.save_json()

    def data_transfer(self):
        for num, json_path in enumerate(self.labelme_json):
            with open(json_path, 'r') as fp:
                # load json
                data = json.load(fp)
#                (prefix, res) = os.path.split(json_path)
#                (file_name, extension) = os.path.splitext(res)
                self.images.append(self.image(data, num, json_path))
                for shapes in data['shapes']:
                    label = shapes['label']
                    if label not in self.label:
                        self.categories.append(self.category(label))
                        self.label.append(label)
                    points = shapes['points']
                    self.annotations.append(self.annotation(points, label, num))
                    self.annID += 1

    def image(self, data, num, json_path):
        image = {}
        # get image path
        _, img_extension = os.path.splitext(data["imagePath"])
        image_path = json_path.replace(".json", img_extension)
        img_shape = read_image_shape_as_dict(image_path)
        height, width = img_shape['height'], img_shape['width']

        image['height'] = height
        image['width'] = width
        image['id'] = int(num + 1)
        image['file_name'] = image_path

        self.height = height
        self.width = width

        return image

    def category(self, label):
        category = {}
        category['supercategory'] = label
        category['id'] = int(len(self.label) + 1)
        category['name'] = label

        return category

    def annotation(self, points, label, num):
        annotation = {}
        annotation['iscrowd'] = 0
        annotation['image_id'] = int(num + 1)

        annotation['bbox'] = list(map(float, self.getbbox(points)))

        # coarsely from bbox to segmentation
        x = annotation['bbox'][0]
        y = annotation['bbox'][1]
        w = annotation['bbox'][2]
        h = annotation['bbox'][3]
        annotation['segmentation'] = [np.asarray(points).flatten().tolist()]

        annotation['category_id'] = self.getcatid(label)
        annotation['id'] = int(self.annID)
        # add area info
        annotation['area'] = self.height * self.width  # the area is not used for detection
        return annotation

    def getcatid(self, label):
        for categorie in self.categories:
            if label == categorie['name']:
                return categorie['id']
            # if label[1]==categorie['name']:
            #     return categorie['id']
        return -1

    def getbbox(self, points):
        # img = np.zeros([self.height,self.width],np.uint8)
        # cv2.polylines(img, [np.asarray(points)], True, 1, lineType=cv2.LINE_AA)
        # cv2.fillPoly(img, [np.asarray(points)], 1)
        polygons = points
        mask = self.polygons_to_mask([self.height, self.width], polygons)
        return self.mask2box(mask)

    def mask2box(self, mask):
        # np.where(mask==1)
        index = np.argwhere(mask == 1)
        rows = index[:, 0]
        clos = index[:, 1]

        left_top_r = np.min(rows)  # y
        left_top_c = np.min(clos)  # x

        right_bottom_r = np.max(rows)
        right_bottom_c = np.max(clos)

        return [left_top_c, left_top_r, right_bottom_c-left_top_c, right_bottom_r-left_top_r]  # [x1,y1,w,h] for coco box format

    def polygons_to_mask(self, img_shape, polygons):
        mask = np.zeros(img_shape, dtype=np.uint8)
        mask = PIL.Image.fromarray(mask)
        xy = list(map(tuple, polygons))
        PIL.ImageDraw.Draw(mask).polygon(xy=xy, outline=1, fill=1)
        mask = np.array(mask, dtype=bool)
        return mask

    def data2coco(self):
        data_coco = {}
        data_coco['images'] = self.images
        data_coco['categories'] = self.categories
        data_coco['annotations'] = self.annotations
        return data_coco

    def save_json(self):
        self.data_transfer()
        self.data_coco = self.data2coco()

        json.dump(self.data_coco, open(self.save_json_path, 'w', encoding='utf-8'), indent=4, separators=(',', ': '), cls=MyEncoder)


# type check when save json files
class MyEncoder(json.JSONEncoder):
    def default(self, obj):
        if isinstance(obj, np.integer):
            return int(obj)
        elif isinstance(obj, np.floating):
            return float(obj)
        elif isinstance(obj, np.ndarray):
            return obj.tolist()
        else:
            return super(MyEncoder, self).default(obj)


if __name__ == "__main__":
    #labelme_folder 你的标注图片和标签所在的文件夹
    labelme_folder = r"D:\jiedan\tool_data\hh\val"
    #save_json_path 转换生成的coco格式的标签文件的保存路径
    save_json_path = r"D:\jiedan\tool_data\hh\val_coco_format.json"
    labelme2coco(labelme_folder, save_json_path)
	#下面是可视化标注的mask
    # import os
    #
    # from pycocotools.coco import COCO
    # from skimage import io
    # from matplotlib import pyplot as plt
    #
    # json_file = r'D:\jiedan\dataset\label\train_coco_format.json'  # 输入文件路径
    # dataset_dir = r''
    # coco = COCO(json_file)
    # catIds = coco.getCatIds(catNms=['head'])  # 标注的图片的不同类型别，超过一类，用逗号隔开
    # imgIds = coco.getImgIds(catIds=catIds)  # 图片id，许多值
    # for i in range(len(imgIds)):
    #     img = coco.loadImgs(imgIds[i])[0]
    #     I = io.imread(dataset_dir + img['file_name'])
    #     plt.axis('off')
    #     plt.imshow(I)  # 绘制图像，显示交给plt.show()处理
    #     annIds = coco.getAnnIds(imgIds=img['id'], catIds=catIds, iscrowd=None)
    #     anns = coco.loadAnns(annIds)
    #     coco.showAnns(anns)
    #     plt.show()  # 显示图像
    #
    #

2.注册mask2former自定数据集

转换好的数据集存放位置如下：
train和val文件夹存放划分好的数据集图片，两个json文件对应于生成的json转好的coco格式。
在这里插入图片描述
我们在主函数中进行数据集注册：

// 第1个参数为自定义数据集名称
//第2个参数为json文件的相对路径
//第3个参数为图片数据集文件的相对路径
from detectron2.data.datasets import register_coco_instances
register_coco_instances("my_dataset_train", {}, "tool_data/train_coco_format.json", "tool_data/train")
register_coco_instances("my_dataset_val", {}, "tool_data/val_coco_format.json", "tool_data/val")

3.更改配置文件

我们是使用的coco格式的数据集进行训练，所以找到配置文件中的coco文件
在这里插入图片描述
首先我们对基础base配置文件进行更改，将train和test数据集换成我们刚在在主函数中注册的数据集名称。

接下来，比如我们使用maskformer2_R50_bs16_50ep.yaml模型文件，则进行相应的类别数进行修改。

最后就可以训练自己的数据集啦！（ps：模型的权重去model ZOO中进行下载即可！）

python train_net.py --num-gpus 1 --config-file configs/coco/instance-segmentation/maskformer2_R50_bs16_50ep.yaml  MODEL.WEIGHTS "weights/model_final_94dc52.pkl"

4.小工具

最后再附上一个通过标注的json文件转成mask的代码：

import os
import json
import base64
import imgviz
import PIL.Image
import os.path as osp

from tqdm import tqdm
from labelme import utils
from threading import Thread
'''这个是根据labelme标注的json文件生成mask'''

def ConvertOne(labelme_dir, json_file, save_dir, label_name_to_value):
    out_dir = os.path.join(save_dir, json_file.replace(".json", ""))
    if not os.path.exists(out_dir):
        os.makedirs(out_dir)
    json_path = osp.join(labelme_dir, json_file)
    with open(json_path, "r") as jf:
        data = json.load(jf)
        imageData = data.get("imageData")

        # labelme 的图像数据编码以及返回处理格式
        if not imageData:
            imagePath = os.path.join(os.path.dirname(json_file), data["imagePath"])
            with open(imagePath, "rb") as f:
                imageData = f.read()
                imageData = base64.b64encode(imageData).decode("utf-8")
        img = utils.img_b64_to_arr(imageData)

        lbl, _ = utils.shapes_to_label(
            img.shape, data["shapes"], label_name_to_value
        )

        label_names = [None] * (max(label_name_to_value.values()) + 1)

        for name, value in label_name_to_value.items():
            label_names[value] = name

        # label_names={'_background_','line','border'}
        lbl_viz = imgviz.label2rgb(
            lbl, imgviz.asgray(img), label_names=label_names, loc="rb"
        )

        PIL.Image.fromarray(img).save(osp.join(out_dir, "img.png"))
        # 保存标签图片
        utils.lblsave(osp.join(out_dir, "label.png"), lbl)
        # utils.lblsave((out_dir + ".png"), lbl)
        # 保存带标签的可视化图像
        PIL.Image.fromarray(lbl_viz).save(osp.join(out_dir, "label_viz.png"))

        with open(osp.join(out_dir, "label_names.txt"), "w") as f:
            for lbl_name in label_names:
                f.write(lbl_name + "\n")


def main():
    labelme_dir = r'D:\jiedan\tool_data\data_annotation'  # json文件存放文件夹
    save_dir = r'D:\jiedan\tool_data\output'  # 结果生成文件夹
    # 类别标签
    class_names = {
        '_background_': 0,
        "tool 1": 1,
        "tool 2": 2,
        "tool 3": 3,
        "tool 4": 4,
        "tool 5": 5,
        "tool 6": 6,
        "tool 7": 7,
        "tool 8": 8,

    }
    # 列出labelme勾画标签后文件夹中保存的所有文件名
    file_list = os.listdir(labelme_dir)
    # 找到勾画保存的所有json标签
    json_list = []
    [json_list.append(x) for x in file_list if x.endswith(".json")]

    for json_file in tqdm(json_list):
        # 单线程
        ConvertOne(labelme_dir, json_file, save_dir, class_names)

        # 多线程
        # Thread(target=ConvertOne, args=(labelme_dir, json_file, save_dir, class_names)).start()
        print(f"生成结果保存地址：{save_dir}")


if __name__ == "__main__":
    main()