Labelme 目标检测和语义分割的数据标注

1.安装labelme

打开conda prompt 输入以下代码创建虚拟环境,打开虚拟环境,安装lelme。

conda create -n labelme python=3.6 //创建虚拟环境

conda activate labelme //打开虚拟环境

pip install labelme  //安装labelme

2.标注数据

安装labelme后再环境下输入labelme,回车打开labelme如图

 选择opendir打开要标注文件所在的文件夹,开始进行标注。在图片区域点击右键,如下图,如果是目标检测的话就点击Create Rectangle标注目标所在的目标框,若果是分割的话就点击Create Polygons。标注完成以后加上标签,别忘了保存。

 3.数据转换

在进行目标训练的时候经常要把标注的结果转化为需要的yolo格式,下面贴出数据集转yolo的代码。

## python  数据集转yolo代码
import json
import os

import cv2

file_dir = "E:/BaiduNetdiskDownload/all_images/all_images/"


files = [i for i in os.listdir(file_dir)]



label_list = []
for file in files:
    if not os.path.basename(file).endswith(".json"):
        continue
    file_path = os.path.join(file_dir, os.path.basename(file))
    image_path = file_path.replace(".json", ".bmp")
    print(image_path)
    image = cv2.imread(image_path)
    W = image.shape[1]
    H = image.shape[0]
    with open(file_path, "rb") as rf:
        file_in = json.load(rf)
    shapes = file_in["shapes"]
    anno = ""
    for shape in shapes:
        if shape["label"] not in label_list:
            label_list.append(shape["label"])
        x1 = shape["points"][0][0]
        y1 = shape["points"][0][1]
        x2 = shape["points"][1][0]
        y2 = shape["points"][1][1]
        x = (x1 + x2) / 2 / W
        y = (y1 + y2) / 2 / H
        w = (x2 - x1) / W
        h = (y2 - y1) / H

        anno += str(label_list.index(shape["label"])) + " " + str(x) + " " + str(y) + " " + str(w) + " " + str(h) + "\n"

    new_path = os.path.join(file_dir, os.path.basename(file).replace(".json", ".txt"))
    with open(new_path, "wb") as wf:
        wf.write(anno.encode())





## python 数据集 转yolo-face的代码

import json
import os

import cv2

file_dir = "E:/BaiduNetdiskDownload/yolo-face/new_dataset/images"

files = [i for i in os.listdir(file_dir)]

label_list = []
for file in files:
    if not os.path.basename(file).endswith(".json"):
        continue
    file_path = os.path.join(file_dir, os.path.basename(file))
    image_path = file_path.replace(".json", ".bmp")
    print(image_path)
    # image = cv2.imread(image_path)
    # W = image.shape[1]
    # H = image.shape[0]
    with open(file_path, "rb") as rf:
        file_in = json.load(rf)
    shapes = file_in["shapes"]
    W = file_in["imageWidth"]
    H = file_in["imageHeight"]
    anno = ""
    for shape in shapes:
        if shape["label"] == "target":
            x1 = shape["points"][0][0]
            y1 = shape["points"][0][1]
            x2 = shape["points"][1][0]
            y2 = shape["points"][1][1]
            x = (x1 + x2) / 2 / W
            y = (y1 + y2) / 2 / H
            w = (x2 - x1) / W
            h = (y2 - y1) / H
        if shape["label"] == "circle_point":
            x3 = shape["points"][0][0] / W
            y3 = shape["points"][0][1] / H
            x4 = shape["points"][1][0] / W
            y4 = shape["points"][1][1] / H

            x5 = (x3 + x4) / 2
            y5 = (y3 + y4) / 2
            p1 = (x3, y5)  # left
            p2 = (x5, y3)  # top
            p3 = (x4, y5)  # right
            p4 = (x5, y4)  # bottom
            p5 = (x5, y5)  # center

    print(x1)
    anno = "0" + " " + str(x) + " " + str(y) + " " + str(w) + " " + str(h) + " " + str(x3) + " " + str(y5) + " " + str(x5) + " " + str(y3) + " " + str(x4) + " " + str(y5) + " " + str(x5) + " " + str(y4) + " " + str(x5) + " " + str(y5) + "\n"

    new_path = os.path.join(file_dir, os.path.basename(file).replace(".json", ".txt"))
    with open(new_path, "wb") as wf:
        wf.write(anno.encode())

下面语义分割数据集转化

# 1.单张图片的转化

# cd 到标注的文件夹执行以下命令

labelme_json_to_dataset x.json -o x

#  x.json 要转化的数据
#  x  要输出的目录文件夹

多张图片的转化如下:

首先准备好自己的txt文件,文件中设置好自己的标签。

__ignore__
_background_
human
bike
bottle
//执行以下代码

python labelme2voc.py  input_dir output_dir  --lables  labels.txt


iput_dir 标注文件所在文件夹

output_dir 输出结果的文件夹

labels.txt  指定标签文件的文件名,可更改

下面是lableme2voc.py代码,不方便下载的可以自己从下面拷贝代码,也可以自己下载源文件

#label_to_voc文件内容

#!/usr/bin/env python

from __future__ import print_function

import argparse
import glob
import os
import os.path as osp
import sys

import imgviz
import numpy as np

import labelme


def main():
    parser = argparse.ArgumentParser(
        formatter_class=argparse.ArgumentDefaultsHelpFormatter
    )
    parser.add_argument("input_dir", help="input annotated directory")
    parser.add_argument("output_dir", help="output dataset directory")
    parser.add_argument("--labels", help="labels file", required=True)
    parser.add_argument(
        "--noviz", help="no visualization", action="store_true"
    )
    args = parser.parse_args()

    if osp.exists(args.output_dir):
        print("Output directory already exists:", args.output_dir)
        sys.exit(1)
    os.makedirs(args.output_dir)
    os.makedirs(osp.join(args.output_dir, "JPEGImages"))
    os.makedirs(osp.join(args.output_dir, "SegmentationClass"))
    os.makedirs(osp.join(args.output_dir, "SegmentationClassPNG"))
    if not args.noviz:
        os.makedirs(
            osp.join(args.output_dir, "SegmentationClassVisualization")
        )
    os.makedirs(osp.join(args.output_dir, "SegmentationObject"))
    os.makedirs(osp.join(args.output_dir, "SegmentationObjectPNG"))
    if not args.noviz:
        os.makedirs(
            osp.join(args.output_dir, "SegmentationObjectVisualization")
        )
    print("Creating dataset:", args.output_dir)

    class_names = []
    class_name_to_id = {}
    for i, line in enumerate(open(args.labels).readlines()):
        class_id = i - 1  # starts with -1
        class_name = line.strip()
        class_name_to_id[class_name] = class_id
        if class_id == -1:
            assert class_name == "__ignore__"
            continue
        elif class_id == 0:
            assert class_name == "_background_"
        class_names.append(class_name)
    class_names = tuple(class_names)
    print("class_names:", class_names)
    out_class_names_file = osp.join(args.output_dir, "class_names.txt")
    with open(out_class_names_file, "w") as f:
        f.writelines("\n".join(class_names))
    print("Saved class_names:", out_class_names_file)

    for filename in glob.glob(osp.join(args.input_dir, "*.json")):
        print("Generating dataset from:", filename)

        label_file = labelme.LabelFile(filename=filename)

        base = osp.splitext(osp.basename(filename))[0]
        out_img_file = osp.join(args.output_dir, "JPEGImages", base + ".jpg")
        out_cls_file = osp.join(
            args.output_dir, "SegmentationClass", base + ".npy"
        )
        out_clsp_file = osp.join(
            args.output_dir, "SegmentationClassPNG", base + ".png"
        )
        if not args.noviz:
            out_clsv_file = osp.join(
                args.output_dir,
                "SegmentationClassVisualization",
                base + ".jpg",
            )
        out_ins_file = osp.join(
            args.output_dir, "SegmentationObject", base + ".npy"
        )
        out_insp_file = osp.join(
            args.output_dir, "SegmentationObjectPNG", base + ".png"
        )
        if not args.noviz:
            out_insv_file = osp.join(
                args.output_dir,
                "SegmentationObjectVisualization",
                base + ".jpg",
            )

        img = labelme.utils.img_data_to_arr(label_file.imageData)
        imgviz.io.imsave(out_img_file, img)

        cls, ins = labelme.utils.shapes_to_label(
            img_shape=img.shape,
            shapes=label_file.shapes,
            label_name_to_value=class_name_to_id,
        )
        ins[cls == -1] = 0  # ignore it.

        # class label
        labelme.utils.lblsave(out_clsp_file, cls)
        np.save(out_cls_file, cls)
        if not args.noviz:
            clsv = imgviz.label2rgb(
                cls,
                imgviz.rgb2gray(img),
                label_names=class_names,
                font_size=15,
                loc="rb",
            )
            imgviz.io.imsave(out_clsv_file, clsv)

        # instance label
        labelme.utils.lblsave(out_insp_file, ins)
        np.save(out_ins_file, ins)
        if not args.noviz:
            instance_ids = np.unique(ins)
            instance_names = [str(i) for i in range(max(instance_ids) + 1)]
            insv = imgviz.label2rgb(
                ins,
                imgviz.rgb2gray(img),
                label_names=instance_names,
                font_size=15,
                loc="rb",
            )
            imgviz.io.imsave(out_insv_file, insv)


if __name__ == "__main__":
    main()

下面是voc2coco代码:

#!/usr/bin/env python

import argparse
import collections
import datetime
import glob
import json
import os
import os.path as osp
import sys
import uuid

import imgviz
import numpy as np

import labelme

try:
    import pycocotools.mask
except ImportError:
    print("Please install pycocotools:\n\n    pip install pycocotools\n")
    sys.exit(1)


def main():
    parser = argparse.ArgumentParser(
        formatter_class=argparse.ArgumentDefaultsHelpFormatter
    )
    parser.add_argument("input_dir", help="input annotated directory")
    parser.add_argument("output_dir", help="output dataset directory")
    parser.add_argument("--labels", help="labels file", required=True)
    parser.add_argument(
        "--noviz", help="no visualization", action="store_true"
    )
    args = parser.parse_args()

    if osp.exists(args.output_dir):
        print("Output directory already exists:", args.output_dir)
        sys.exit(1)
    os.makedirs(args.output_dir)
    os.makedirs(osp.join(args.output_dir, "JPEGImages"))
    if not args.noviz:
        os.makedirs(osp.join(args.output_dir, "Visualization"))
    print("Creating dataset:", args.output_dir)

    now = datetime.datetime.now()

    data = dict(
        info=dict(
            description=None,
            url=None,
            version=None,
            year=now.year,
            contributor=None,
            date_created=now.strftime("%Y-%m-%d %H:%M:%S.%f"),
        ),
        licenses=[
            dict(
                url=None,
                id=0,
                name=None,
            )
        ],
        images=[
            # license, url, file_name, height, width, date_captured, id
        ],
        type="instances",
        annotations=[
            # segmentation, area, iscrowd, image_id, bbox, category_id, id
        ],
        categories=[
            # supercategory, id, name
        ],
    )

    class_name_to_id = {}
    for i, line in enumerate(open(args.labels).readlines()):
        class_id = i - 1  # starts with -1
        class_name = line.strip()
        if class_id == -1:
            assert class_name == "__ignore__"
            continue
        class_name_to_id[class_name] = class_id
        data["categories"].append(
            dict(
                supercategory=None,
                id=class_id,
                name=class_name,
            )
        )

    out_ann_file = osp.join(args.output_dir, "annotations.json")
    label_files = glob.glob(osp.join(args.input_dir, "*.json"))
    for image_id, filename in enumerate(label_files):
        print("Generating dataset from:", filename)

        label_file = labelme.LabelFile(filename=filename)

        base = osp.splitext(osp.basename(filename))[0]
        out_img_file = osp.join(args.output_dir, "JPEGImages", base + ".jpg")

        img = labelme.utils.img_data_to_arr(label_file.imageData)
        imgviz.io.imsave(out_img_file, img)
        data["images"].append(
            dict(
                license=0,
                url=None,
                file_name=osp.relpath(out_img_file, osp.dirname(out_ann_file)),
                height=img.shape[0],
                width=img.shape[1],
                date_captured=None,
                id=image_id,
            )
        )

        masks = {}  # for area
        segmentations = collections.defaultdict(list)  # for segmentation
        for shape in label_file.shapes:
            points = shape["points"]
            label = shape["label"]
            group_id = shape.get("group_id")
            shape_type = shape.get("shape_type", "polygon")
            mask = labelme.utils.shape_to_mask(
                img.shape[:2], points, shape_type
            )

            if group_id is None:
                group_id = uuid.uuid1()

            instance = (label, group_id)

            if instance in masks:
                masks[instance] = masks[instance] | mask
            else:
                masks[instance] = mask

            if shape_type == "rectangle":
                (x1, y1), (x2, y2) = points
                x1, x2 = sorted([x1, x2])
                y1, y2 = sorted([y1, y2])
                points = [x1, y1, x2, y1, x2, y2, x1, y2]
            if shape_type == "circle":
                (x1, y1), (x2, y2) = points
                r = np.linalg.norm([x2 - x1, y2 - y1])
                # r(1-cos(a/2))<x, a=2*pi/N => N>pi/arccos(1-x/r)
                # x: tolerance of the gap between the arc and the line segment
                n_points_circle = max(int(np.pi / np.arccos(1 - 1 / r)), 12)
                i = np.arange(n_points_circle)
                x = x1 + r * np.sin(2 * np.pi / n_points_circle * i)
                y = y1 + r * np.cos(2 * np.pi / n_points_circle * i)
                points = np.stack((x, y), axis=1).flatten().tolist()
            else:
                points = np.asarray(points).flatten().tolist()

            segmentations[instance].append(points)
        segmentations = dict(segmentations)

        for instance, mask in masks.items():
            cls_name, group_id = instance
            if cls_name not in class_name_to_id:
                continue
            cls_id = class_name_to_id[cls_name]

            mask = np.asfortranarray(mask.astype(np.uint8))
            mask = pycocotools.mask.encode(mask)
            area = float(pycocotools.mask.area(mask))
            bbox = pycocotools.mask.toBbox(mask).flatten().tolist()

            data["annotations"].append(
                dict(
                    id=len(data["annotations"]),
                    image_id=image_id,
                    category_id=cls_id,
                    segmentation=segmentations[instance],
                    area=area,
                    bbox=bbox,
                    iscrowd=0,
                )
            )

        if not args.noviz:
            viz = img
            if masks:
                labels, captions, masks = zip(
                    *[
                        (class_name_to_id[cnm], cnm, msk)
                        for (cnm, gid), msk in masks.items()
                        if cnm in class_name_to_id
                    ]
                )
                viz = imgviz.instances2rgb(
                    image=img,
                    labels=labels,
                    masks=masks,
                    captions=captions,
                    font_size=15,
                    line_width=2,
                )
            out_viz_file = osp.join(
                args.output_dir, "Visualization", base + ".jpg"
            )
            imgviz.io.imsave(out_viz_file, viz)

    with open(out_ann_file, "w") as f:
        json.dump(data, f)


if __name__ == "__main__":
    main()

  • 1
    点赞
  • 18
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
labelme中进行数据建模的过程可以分为以下几个步骤: 1. 准备数据集:首先需要准备好标注的数据集。可以使用labelme提供的工具对图像进行标注,生成标注文件。标注文件的格式通常是JSON格式。 2. 数据集转换:使用labelme提供的脚本将标注文件转换为适合训练的数据集格式。可以使用官方提供的labelme_json_to_dataset.py脚本进行转换。这个脚本可以通过pip安装labelme 3.16.2版本来使用。 3. 数据集预处理:在进行数据建模之前,通常需要对数据集进行一些预处理操作。例如,可以对图像进行大小调整、数据增强等操作,以提高建模的效果。 4. 模型选择和训练:根据任务的需求选择适合的建模算法或模型架构。可以使用常见的机器学习算法,如支持向量机(SVM)、随机森林(Random Forest)等,也可以使用深度学习框架,如TensorFlow、PyTorch等。根据数据集和模型的选择进行训练,并调整参数以获得最佳的建模效果。 5. 模型评估和验证:对训练好的模型进行评估和验证,以检查模型在未知数据上的泛化能力。可以使用一些评估指标,如准确率、召回率、F1值等来评估模型的性能。 6. 模型应用:当模型训练和验证完成后,可以将其应用于实际场景中。可以使用模型进行图像分类、目标检测语义分割等任务,根据具体需求进行应用。 需要注意的是,上述步骤中的每个步骤都需要仔细处理和调整,以确保数据建模的效果和性能。同时,还可以根据具体的需求进行调整和改进,以更好地适应实际场景。

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值