win11 无conda环境,从零开始制作自采数据COCO格式目标检测数据集

接了一个任务,给隔壁科室制作一个用于训练目标检测的数据集,要求生成coco格式,并且label给定两个,数据来源是监控视频。

将任务拆解:

  1. 视频帧截取
  2. 获取的不重复的图像(因为监控的场景变动不大)
  3. 对筛选后的图像打标签——labelme
  4. 由于数据量少,需要进行扩充
  5. 将扩充后的labelme数据集转化为coco格式的数据集

 视频帧截取+获取不重复图像

import cv2
import os
from PIL import Image, UnidentifiedImageError
import imagehash

# 视频文件夹路径
video_folder = ''  # 替换为您的视频文件夹路径

# 图片保存文件夹路径
output_folder = ''  # 替换为您的图片保存文件夹路径

# 创建图片保存文件夹
os.makedirs(output_folder, exist_ok=True)

# 字典用于存储图像哈希值和对应的文件路径
hash_dict = {}

# 全局帧计数器
global_frame_count = 0

# 遍历视频文件夹中的所有视频文件
for filename in os.listdir(video_folder):
    if filename.endswith('.mp4'):  # 假设只处理.mp4格式的视频文件
        video_path = os.path.join(video_folder, filename)
        vc = cv2.VideoCapture(video_path)

        if vc.isOpened():
            rval, prev_frame = vc.read()
        else:
            rval = False

        frame_count = 0  # 当前视频的帧计数

        while rval:
            # 读取下一帧
            rval, curr_frame = vc.read()

            if not rval:
                break

            # 计算当前帧与前一帧的差异
            diff = cv2.absdiff(curr_frame, prev_frame)
            diff_sum = diff.sum()

            # 保存当前帧为图片,如果帧与前一帧有差异且不与已保存图像重复
            if diff_sum > 0:
                global_frame_count += 1
                image_path = os.path.join(output_folder, f'image_{global_frame_count}.jpg')

                # 进行相似度检验
                try:
                    image = Image.fromarray(curr_frame)
                    gray_image = image.convert('L')
                    image_hash = imagehash.average_hash(gray_image)

                    if image_hash in hash_dict:
                        # 图像与已保存图像重复,直接删除
                        print(image_path, "删除重复图像")
                        continue
                    else:
                        # 图像不重复,保存图像并将哈希值和文件路径添加到字典
                        image.save(image_path)
                        hash_dict[image_hash] = image_path
                except UnidentifiedImageError:
                    # 识别不了的图像文件,直接删除
                    print(image_path, "删除失败,无法识别的图像文件")
                    continue

            prev_frame = curr_frame
            frame_count += 1                     

对筛选后的图像打标签——labelme

使用的labelme的windows软件,我电脑anaconda不知道为什么安装不了了,我就直接装了一个python3.11,然后在系统环境中安装各种包来运行帖子中的代码,至于labelme,我使用的是exe的启动文件,长下面的样子,它在python3.11的文件夹,找起来比较费劲,可以直接文件检索,是可以启动的。

我的启动之后长这个样子,使用方法跟网上的教程是一样的,参考:深度学习图像标签标注软件labelme超详细教程_labelme是做什么用的-CSDN博客

数据扩充

数据扩充可以先基于labelme格式来扩充,然后再转化为其他格式,下面是扩充代码:

# -*- coding:utf-8 -*-
"""
Time:     2021.10.31
Author:   Athrunsunny
Version:  V 0.1
File:     enhance.py
"""
import cv2
import torch
import numpy as np
import warnings
import numbers
import math
import random
import json
import os
import base64
import io
import PIL.Image
from tqdm import tqdm
from glob import glob
from torch import Tensor
from PIL import Image, ImageDraw
from torchvision import transforms

from torchtoolbox.transform import Cutout
from typing import Tuple, List, Optional
from torchvision.transforms import functional as F

ROOT_DIR = os.getcwd()
VERSION = '4.5.7'  # 根据labelme的版本来修改
functionList = ['resize', 'resize_', 'random_flip_horizon', 'random_flip_vertical', 'center_crop', 'random_equalize',
                'random_autocontrast', 'random_adjustSharpness', 'random_solarize', 'random_posterize',
                'random_grayscale', 'gaussian_blur', 'random_invert', 'random_cutout', 'random_erasing',
                'random_bright', 'random_contrast', 'random_saturation', 'add_gasuss_noise', 'add_salt_noise',
                'add_pepper_noise', 'mixup', 'random_perspective', 'random_rotate', 'mosaic']


class RandomHorizontalFlip(torch.nn.Module):
    def __init__(self, p=0.5):
        super().__init__()
        self.p = p

    def forward(self, img):
        return F.hflip(img)

    def __repr__(self):
        return self.__class__.__name__ + '(p={})'.format(self.p)


class RandomVerticalFlip(torch.nn.Module):
    def __init__(self, p=0.5):
        super().__init__()
        self.p = p

    def forward(self, img):
        return F.vflip(img)

    def __repr__(self):
        return self.__class__.__name__ + '(p={})'.format(self.p)


class RandomErasing(torch.nn.Module):
    def __init__(self, scale=(0.02, 0.33), ratio=(0.3, 3.3), value=0, inplace=False):
        super().__init__()
        if not isinstance(value, (numbers.Number, str, tuple, list)):
            raise TypeError("Argument value should be either a number or str or a sequence")
        if isinstance(value, str) and value != "random":
            raise ValueError("If value is str, it should be 'random'")
        if not isinstance(scale, (tuple, list)):
            raise TypeError("Scale should be a sequence")
        if not isinstance(ratio, (tuple, list)):
            raise TypeError("Ratio should be a sequence")
        if (scale[0] > scale[1]) or (ratio[0] > ratio[1]):
            warnings.warn("Scale and ratio should be of kind (min, max)")
        if scale[0] < 0 or scale[1] > 1:
            raise ValueError("Scale should be between 0 and 1")

        self.scale = scale
        self.ratio = ratio
        self.value = value
        self.inplace = inplace

    @staticmethod
    def get_params(
            img: Tensor, scale: Tuple[float, float], ratio: Tuple[float, float], value: Optional[List[float]] = None
    ) -> Tuple[int, int, int, int, Tensor]:
        img_c, img_h, img_w = img.shape[-3], img.shape[-2], img.shape[-1]
        area = img_h * img_w

        log_ratio = torch.log(torch.tensor(ratio))
        for _ in range(10):
            erase_area = area * torch.empty(1).uniform_(scale[0], scale[1]).item()
            aspect_ratio = torch.exp(
                torch.empty(1).uniform_(log_ratio[0], log_ratio[1])
            ).item()

            h = int(round(math.sqrt(erase_area * aspect_ratio)))
            w = int(round(math.sqrt(erase_area / aspect_ratio)))
            if not (h < img_h and w < img_w):
                continue

            if value is None:
                v = torch.empty([img_c, h, w], dtype=torch.float32).normal_()
            else:
                v = torch.tensor(value)[:, None, None]

            i = torch.randint(0, img_h - h + 1, size=(1,)).item()
            j = torch.randint(0, img_w - w + 1, size=(1,)).item()
            return i, j, h, w, v

        # Return original image
        return 0, 0, img_h, img_w, img

    def forward(self, img):
        # cast self.value to script acceptable type
        if isinstance(self.value, (int, float)):
            value = [self.value, ]
        elif isinstance(self.value, str):
            value = None
        elif isinstance(self.value, tuple):
            value = list(self.value)
        else:
            value = self.value

        if value is not None and not (len(value) in (1, img.shape[-3])):
            raise ValueError(
                "If value is a sequence, it should have either a single value or "
                "{} (number of input channels)".format(img.shape[-3])
            )

        x, y, h, w, v = self.get_params(img, scale=self.scale, ratio=self.ratio, value=value)
        return F.erase(img, x, y, h, w, v, self.inplace)


def box_candidates(box1, box2, wh_thr=2, ar_thr=100, area_thr=0.1, eps=1e-16):  # box1(4,n), box2(4,n)
    # Compute candidate boxes: box1 before augment, box2 after augment, wh_thr (pixels), aspect_ratio_thr, area_ratio
    w1, h1 = box1[2] - box1[0], box1[3] - box1[1]
    w2, h2 = box2[2] - box2[0], box2[3] - box2[1]
    ar = np.maximum(w2 / (h2 + eps), h2 / (w2 + eps))  # aspect ratio
    return (w2 > wh_thr) & (h2 > wh_thr) & (w2 * h2 / (w1 * h1 + eps) > area_thr) & (ar < ar_thr)  # candidates


def random_perspective(im, targets=(), degrees=10, translate=.1, scale=.1, shear=10, perspective=0.0, border=(0, 0)):
    # torchvision.transforms.RandomAffine(degrees=(-10, 10), translate=(0.1, 0.1), scale=(0.9, 1.1), shear=(-10, 10))
    # targets = [cls, xyxy]

    height = im.shape[0] + border[0] * 2  # shape(h,w,c)
    width = im.shape[1] + border[1] * 2

    # Center
    C = np.eye(3)
    C[0, 2] = -im.shape[1] / 2  # x translation (pixels)
    C[1, 2] = -im.shape[0] / 2  # y translation (pixels)

    # Perspective
    P = np.eye(3)
    P[2, 0] = random.uniform(-perspective, perspective)  # x perspective (about y)
    P[2, 1] = random.uniform(-perspective, perspective)  # y perspective (about x)

    # Rotation and Scale
    R = np.eye(3)
    a = random.uniform(-degrees, degrees)
    # a += random.choice([-180, -90, 0, 90])  # add 90deg rotations to small rotations
    s = random.uniform(1 - scale, 1 + scale)
    # s = 2 ** random.uniform(-scale, scale)
    R[:2] = cv2.getRotationMatrix2D(angle=a, center=(0, 0), scale=s)

    # Shear
    S = np.eye(3)
    S[0, 1] = math.tan(random.uniform(-shear, shear) * math.pi / 180)  # x shear (deg)
    S[1, 0] = math.tan(random.uniform(-shear, shear) * math.pi / 180)  # y shear (deg)

    # Translation
    T = np.eye(3)
    T[0, 2] = random.uniform(0.5 - translate, 0.5 + translate) * width  # x translation (pixels)
    T[1, 2] = random.uniform(0.5 - translate, 0.5 + translate) * height  # y translation (pixels)

    # Combined rotation matrix
    M = T @ S @ R @ P @ C  # order of operations (right to left) is IMPORTANT
    if (border[0] != 0) or (border[1] != 0) or (M != np.eye(3)).any():  # image changed
        if perspective:
            im = cv2.warpPerspective(im, M, dsize=(width, height), borderValue=(114, 114, 114))
        else:  # affine
            im = cv2.warpAffine(im, M[:2], dsize=(width, height), borderValue=(114, 114, 114))

    # Visualize
    # import matplotlib.pyplot as plt
    # ax = plt.subplots(1, 2, figsize=(12, 6))[1].ravel()
    # ax[0].imshow(im[:, :, ::-1])  # base
    # ax[1].imshow(im2[:, :, ::-1])  # warped

    # Transform label coordinates
    n = len(targets)
    if n:
        new = np.zeros((n, 4))
        # warp boxes
        xy = np.ones((n * 4, 3))
        xy[:, :2] = targets[:, [1, 2, 3, 4, 1, 4, 3, 2]].reshape(n * 4, 2)  # x1y1, x2y2, x1y2, x2y1
        xy = xy @ M.T  # transform
        xy = (xy[:, :2] / xy[:, 2:3] if perspective else xy[:, :2]).reshape(n, 8)  # perspective rescale or affine

        # create new boxes
        x = xy[:, [0, 2, 4, 6]]
        y = xy[:, [1, 3, 5, 7]]
        new = np.concatenate((x.min(1), y.min(1), x.max(1), y.max(1))).reshape(4, n).T

        # clip
        new[:, [0, 2]] = new[:, [0, 2]].clip(0, width)
        new[:, [1, 3]] = new[:, [1, 3]].clip(0, height)

        # filter candidates
        i = box_candidates(box1=targets[:, 1:5].T * s, box2=new.T, area_thr=0.10)
        targets = targets[i]
        targets[:, 1:5] = new[i]

    return im, targets


def cutout(im, labels):
    h, w = im.shape[:2]
    scales = [0.25] * 2 + [0.125] * 4 + [0.0625] * 8 + [0.03125] * 16 # image size fraction [0.5] * 1 +
    for s in scales:
        mask_h = random.randint(1, int(h * s))  # create random masks
        mask_w = random.randint(1, int(w * s))

        # box
        xmin = max(0, random.randint(0, w) - mask_w // 2)
        ymin = max(0, random.randint(0, h) - mask_h // 2)
        xmax = min(w, xmin + mask_w)
        ymax = min(h, ymin + mask_h)

        # apply random color mask
        im[ymin:ymax, xmin:xmax] = [random.randint(64, 191) for _ in range(3)]

    return im, labels


def convert(size, box):
    dh = 1. / (size[0])
    dw = 1. / (size[1])
    x = (box[0] + box[1]) / 2.0 - 1
    y = (box[2] + box[3]) / 2.0 - 1
    w = box[1] - box[0]
    h = box[3] - box[2]
    x = x * dw
    w = w * dw
    y = y * dh
    h = h * dh
    return x, y, w, h


def load_json_points_to_norm(file, cls, size):
    assert isinstance(file, str)
    with open(file, 'r', encoding="utf-8") as f:
        doc = json.load(f)
    point = []
    for multi in doc["shapes"]:
        points = np.array(multi["points"])
        xmin = min(points[:, 0]) if min(points[:, 0]) > 0 else 0
        xmax = max(points[:, 0]) if max(points[:, 0]) > 0 else 0
        ymin = min(points[:, 1]) if min(points[:, 1]) > 0 else 0
        ymax = max(points[:, 1]) if max(points[:, 1]) > 0 else 0
        label = multi["label"]
        if xmax <= xmin:
            pass
        elif ymax <= ymin:
            pass
        else:
            cls_id = cls.index(label)
            b = (float(xmin), float(xmax), float(ymin), float(ymax))
            bb = convert(size, b)
            point.append([cls_id] + list(bb))

    return torch.tensor(point)


def load_image(path, img_size=640, augment=False):
    im = cv2.imread(path)  # BGR
    im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB)
    assert im is not None, f'Image Not Found {path}'

    h0, w0 = im.shape[:2]  # orig hw
    r = img_size / max(h0, w0)  # ratio
    if r != 1:  # if sizes are not equal
        im = cv2.resize(im,
                        (int(w0 * r), int(h0 * r)),
                        interpolation=cv2.INTER_LINEAR if (augment or r > 1) else cv2.INTER_AREA)
    return im, (h0, w0), im.shape[:2]  # im, hw_original, hw_resized


def xywhn2xyxy(x, w=640, h=640, padw=0, padh=0):
    # Convert nx4 boxes from [x, y, w, h] normalized to [x1, y1, x2, y2] where xy1=top-left, xy2=bottom-right
    y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x)
    y[:, 0] = w * (x[:, 0] - x[:, 2] / 2) + padw  # top left x
    y[:, 1] = h * (x[:, 1] - x[:, 3] / 2) + padh  # top left y
    y[:, 2] = w * (x[:, 0] + x[:, 2] / 2) + padw  # bottom right x
    y[:, 3] = h * (x[:, 1] + x[:, 3] / 2) + padh  # bottom right y
    return y


def load_mosaic(imgpath, cls, imgfile, img_size=640):
    labels4 = []
    s = img_size
    mosaic_border = [-img_size // 2, -img_size // 2]
    yc, xc = (int(random.uniform(-x, 2 * s + x)) for x in mosaic_border)  # mosaic center x, y
    files = np.concatenate([np.array([imgpath]), np.random.choice(imgfile, size=3, replace=False)])
    random.shuffle(files)
    for i, file in enumerate(files):
        # Load image
        img, (oh, ow), (h, w) = load_image(file)
        jsonpath = '.'.join(file.split('.')[:-1]) + '.json'
        points = load_json_points_to_norm(jsonpath, cls, (oh, ow))
        # place img in img4
        if i == 0:  # top left
            img4 = np.full((s * 2, s * 2, img.shape[2]), 114, dtype=np.uint8)  # base image with 4 tiles
            x1a, y1a, x2a, y2a = max(xc - w, 0), max(yc - h, 0), xc, yc  # xmin, ymin, xmax, ymax (large image)
            x1b, y1b, x2b, y2b = w - (x2a - x1a), h - (y2a - y1a), w, h  # xmin, ymin, xmax, ymax (small image)
        elif i == 1:  # top right
            x1a, y1a, x2a, y2a = xc, max(yc - h, 0), min(xc + w, s * 2), yc
            x1b, y1b, x2b, y2b = 0, h - (y2a - y1a), min(w, x2a - x1a), h
        elif i == 2:  # bottom left
            x1a, y1a, x2a, y2a = max(xc - w, 0), yc, xc, min(s * 2, yc + h)
            x1b, y1b, x2b, y2b = w - (x2a - x1a), 0, w, min(y2a - y1a, h)
        elif i == 3:  # bottom right
            x1a, y1a, x2a, y2a = xc, yc, min(xc + w, s * 2), min(s * 2, yc + h)
            x1b, y1b, x2b, y2b = 0, 0, min(w, x2a - x1a), min(y2a - y1a, h)

        img4[y1a:y2a, x1a:x2a] = img[y1b:y2b, x1b:x2b]  # img4[ymin:ymax, xmin:xmax]
        padw = x1a - x1b
        padh = y1a - y1b

        if points.size:
            points[:, 1:] = xywhn2xyxy(points[:, 1:], w, h, padw, padh)  # normalized xywh to pixel xyxy format
        labels4.append(points)

    labels4 = np.concatenate(labels4, 0)
    for x in (labels4[:, 1:]):
        np.clip(x, 0, 2 * s, out=x)

    return Image.fromarray(img4), torch.from_numpy(labels4)


class DataAugmentation(object):
    to_tensor = transforms.ToTensor()
    to_image = transforms.ToPILImage()

    def __init__(self):
        super(DataAugmentation, self).__init__()
        self.transforms = transforms

    def resize(self, img, boxes, size):
        """
        将图像长和宽缩放到指定值size,并且相应调整boxes
        :param img: Image
        :param boxes: bbox坐标
        :param size:缩放大小
        :return:
        """
        w, h = img.size
        sw = size / w
        sh = size / h
        label, boxes = boxes[:, :1], boxes[:, 1:5]
        boxes = boxes * torch.Tensor([sw, sh, sw, sh])
        boxes = torch.cat((label, boxes), dim=1)
        return img.resize((size, size), Image.BILINEAR), boxes

    def resize_(self, img, boxes, size):
        """
        将图像短边缩放到指定值size,保持原有比例不变,并且相应调整boxes
        :param img: Image
        :param boxes: bbox坐标
        :param size:缩放大小
        :return:
        """
        w, h = img.size
        # min_size = min(w, h)
        # sw = sh = size / min_size
        sw = size[0] / w
        sh = size[1] / h
        ow = int(sw * w + 0.5)
        oh = int(sh * h + 0.5)
        label, boxes = boxes[:, :1], boxes[:, 1:5]
        boxes = boxes * torch.Tensor([sw, sh, sw, sh])
        boxes = torch.cat((label, boxes), dim=1)
        return img.resize((ow, oh), Image.BILINEAR), boxes

    def random_flip_horizon(self, img, boxes):
        """
        Horizontally flip the given image randomly with a given probability.
        :param img: Image
        :param boxes: bbox坐标
        :return:
        """
        p = torch.rand(1)
        if p > 0.5:
            transform = RandomHorizontalFlip()
            img = transform(img)
            w = img.width
            label, boxes = boxes[:, :1], boxes[:, 1:5]
            xmin = w - boxes[:, 2]
            xmax = w - boxes[:, 0]
            boxes[:, 0] = xmin
            boxes[:, 2] = xmax
            boxes = torch.cat((label, boxes), dim=1)
        return img, boxes

    def random_flip_vertical(self, img, boxes):
        """
        Vertically flip the given image randomly with a given probability.
        :param img: Image
        :param boxes: bbox坐标
        :return:
        """
        p = torch.rand(1)
        if p > 0.5:
            transform = RandomVerticalFlip()
            img = transform(img)
            h = img.height
            label, boxes = boxes[:, :1], boxes[:, 1:5]
            ymin = h - boxes[:, 3]
            ymax = h - boxes[:, 1]
            boxes[:, 1] = ymin
            boxes[:, 3] = ymax
            boxes = torch.cat((label, boxes), dim=1)
        return img, boxes

    def center_crop(self, img, boxes, size=(600, 600)):
        """
        中心裁剪
        :param img: Image
        :param boxes: bbox坐标
        :param size: 裁剪大小(w,h)
        :return:
        """
        w, h = img.size
        ow, oh = size
        max_size = torch.as_tensor([ow - 1, oh - 1], dtype=torch.float32)
        i = int(round((h - oh) / 2.))
        j = int(round((w - ow) / 2.))
        img = img.crop((j, i, j + ow, i + oh))
        label, boxes = boxes[:, :1], boxes[:, 1:5]
        boxes = boxes - torch.Tensor([j, i, j, i])
        boxes = torch.min(boxes.reshape(-1, 2, 2), max_size)
        boxes = boxes.clamp(min=0).reshape(-1, 4)
        boxes = torch.cat((label, boxes), dim=1)
        return img, boxes

    def random_equalize(self, img, boxes, p=0.5):
        """
        Equalize the histogram of the given image randomly with a given probability.
        :param img: Image
        :param boxes: bbox坐标
        :param p:probability of the image being equalized
        :return:
        """
        transform = self.transforms.RandomEqualize(p=p)
        img = transform(img)
        return img, boxes

    def random_autocontrast(self, img, boxes, p=0.5):
        """
        Autocontrast the pixels of the given image randomly with a given probability.
        :param img: Image
        :param boxes: bbox坐标
        :param p:probability of the image being autocontrasted
        :return:
        """
        transform = self.transforms.RandomAutocontrast(p=p)
        img = transform(img)
        return img, boxes

    def random_adjustSharpness(self, img, boxes, sharpness_factor=1, p=0.5):
        """
        Adjust the sharpness of the image randomly with a given probability.
        :param img: Image
        :param boxes: bbox坐标
        :param sharpness_factor:How much to adjust the sharpness
        :param p:probability of the image being color inverted
        :return:
        """
        transform = self.transforms.RandomAdjustSharpness(sharpness_factor=sharpness_factor, p=p)
        img = transform(img)
        return img, boxes

    def random_solarize(self, img, boxes, threshold=1, p=0.5):
        """
        Solarize the image randomly with a given probability by inverting all pixel values above a threshold.
        :param img: Image
        :param boxes: bbox坐标
        :param threshold:all pixels equal or above this value are inverted
        :param p:probability of the image being color inverted
        :return:
        """
        transform = self.transforms.RandomSolarize(threshold=threshold, p=p)
        img = transform(img)
        return img, boxes

    def random_posterize(self, img, boxes, bits=0, p=0.5):
        """
        Posterize the image randomly with a given probability by reducing the number of bits for each color channel.
        :param img: Image
        :param boxes: bbox坐标
        :param bits:number of bits to keep for each channel (0-8)
        :param p:probability of the image being color inverted
        :return:
        """
        transform = self.transforms.RandomPosterize(bits=bits, p=p)
        img = transform(img)
        return img, boxes

    def random_grayscale(self, img, boxes, p=0.5):
        """
        Randomly convert image to grayscale with a probability of p (default 0.1).
        :param img: Image
        :param boxes: bbox坐标
        :param p:Grayscale version of the input image with probability p and unchanged with probability (1-p).
        :return:
        """
        transform = self.transforms.RandomGrayscale(p=p)
        img = transform(img)
        return img, boxes

    def gaussian_blur(self, img, boxes, kernel_size=5, sigma=(0.1, 2.0)):
        """
        Blurs image with randomly chosen Gaussian blur.
        :param img: Image
        :param boxes: bbox坐标
        :param kernel_size:Size of the Gaussian kernel
        :param sigma:Standard deviation to be used for creating kernel to perform blurring.
        :return:
        """
        transform = self.transforms.GaussianBlur(kernel_size=kernel_size, sigma=sigma)
        img = transform(img)
        return img, boxes

    def random_invert(self, img, boxes, p=0.5):
        """
        Inverts the colors of the given image randomly with a given probability.
        :param img: Image
        :param boxes: bbox坐标
        :param p:probability of the image being color inverted
        :return:
        """
        transform = self.transforms.RandomInvert(p=p)
        img = transform(img)
        return img, boxes

    def random_cutout_(self, img, boxes, p=0.5, scale=(0.02, 0.4), ratio=(0.4, 1 / 0.4), value=(0, 255),
                       pixel_level=False, inplace=False):
        """
        Random erase the given CV Image
        :param img: Image
        :param boxes: bbox坐标
        :param p:probability that the random erasing operation will be performed
        :param scale:range of proportion of erased area against input image
        :param ratio:range of aspect ratio of erased area
        :param value:erasing value
        :param pixel_level:filling one number or not. Default value is False
        :param inplace:boolean to make this transform inplace. Default set to False
        :return:
        """
        transform = Cutout(p=p, scale=scale, ratio=ratio, value=value, pixel_level=pixel_level, inplace=inplace)
        img = transform(img)
        return img, boxes

    def random_cutout(self, img, boxes):
        img = np.array(img)
        img, boxes = cutout(img, boxes)
        img = Image.fromarray(img)
        return img, boxes

    def random_rotate(self, img, boxes, degrees=5, expand=False, center=None, fill=0, resample=None):
        degree = torch.randint(0, degrees + 1, (1,))
        degree = degree.item()
        transform = self.transforms.RandomRotation(degrees=degree, expand=expand, center=center, fill=fill,
                                                   resample=resample)
        img = transform(img)
        return img, boxes

    def random_perspective(self, img, boxes, degrees=5, translate=.1, scale=.1, shear=5, perspective=0.0,
                           border=(0, 0)):
        img = np.array(img)
        img, boxes = random_perspective(img, boxes.numpy(), degrees=degrees, translate=translate, scale=scale,
                                        shear=shear, perspective=perspective, border=border)
        img = Image.fromarray(img)
        return img, torch.from_numpy(boxes)

    def random_erasing(self, img, boxes, count=3, scale=0.01, ratio=0.4, value=0, inplace=False):
        """
        Randomly selects a rectangle region in an torch Tensor image and erases its pixels.
        :param img: Image
        :param boxes: bbox坐标
        :param scale:range of proportion of erased area against input image
        :param ratio:range of aspect ratio of erased area
        :param value:erasing value
        :param inplace:boolean to make this transform inplace. Default set to False
        :return:
        """
        scale = (scale, scale)
        ratio = (ratio, 1. / ratio)
        if count != 0:
            for num in range(count):
                transform = RandomErasing(scale=scale, ratio=ratio, value=value, inplace=inplace)
                img = transform(self.to_tensor(img))
                img = self.to_image(img)
            return img, boxes
        transform = RandomErasing(scale=scale, ratio=ratio, value=value, inplace=inplace)
        img = transform(self.to_tensor(img))
        return self.to_image(img), boxes

    def random_bright(self, img, boxes, u=32):
        """
        随机亮度
        :param img: Image
        :param boxes: bbox坐标
        :param u:
        :return:
        """
        img = self.to_tensor(img)
        alpha = np.random.uniform(-u, u) / 255
        img += alpha
        img = img.clamp(min=0.0, max=1.0)
        return self.to_image(img), boxes

    def random_contrast(self, img, boxes, lower=0.5, upper=1.5):
        """
        随机对比度
        :param img: Image
        :param boxes: bbox坐标
        :param lower:
        :param upper:
        :return:
        """
        img = self.to_tensor(img)
        alpha = np.random.uniform(lower, upper)
        img *= alpha
        img = img.clamp(min=0, max=1.0)
        return self.to_image(img), boxes

    def random_saturation(self, img, boxes, lower=0.5, upper=1.5):
        """
        随机饱和度
        :param img: Image
        :param boxes: bbox坐标
        :param lower:
        :param upper:
        :return:
        """
        img = self.to_tensor(img)
        alpha = np.random.uniform(lower, upper)
        img[1] = img[1] * alpha
        img[1] = img[1].clamp(min=0, max=1.0)
        return self.to_image(img), boxes

    def add_gasuss_noise(self, img, boxes, mean=0, std=0.1):
        """
        随机高斯噪声
        :param img: Image
        :param boxes: bbox坐标
        :param mean:
        :param std:
        :return:
        """
        img = self.to_tensor(img)
        noise = torch.normal(mean, std, img.shape)
        img += noise
        img = img.clamp(min=0, max=1.0)
        return self.to_image(img), boxes

    def add_salt_noise(self, img, boxes):
        """
        随机盐噪声
        :param img: Image
        :param boxes: bbox坐标
        :return:
        """
        img = self.to_tensor(img)
        noise = torch.rand(img.shape)
        alpha = np.random.random()
        img[noise[:, :, :] > alpha] = 1.0
        return self.to_image(img), boxes

    def add_pepper_noise(self, img, boxes):
        """
        随机椒噪声
        :param img: Image
        :param boxes: bbox坐标
        :return:
        """
        img = self.to_tensor(img)
        noise = torch.rand(img.shape)
        alpha = np.random.random()
        img[noise[:, :, :] > alpha] = 0
        return self.to_image(img), boxes

    def mixup(self, img1, img2, box1, box2, alpha=32.):
        """
        mixup
        :param img1: Image
        :param img2: Image
        :param box1: bbox1坐标
        :param box2: bbox2坐标
        :param alpha:
        :return:
        """
        p = torch.rand(1)
        if p > 0.5:
            max_w = max(img1.size[0], img2.size[0])
            max_h = max(img1.size[1], img2.size[1])
            img1, box1 = self.resize_(img1, box1, (max_w, max_h))
            img2, box2 = self.resize_(img2, box2, (max_w, max_h))

            img1 = self.to_tensor(img1)
            img2 = self.to_tensor(img2)
            weight = np.random.beta(alpha, alpha)
            miximg = weight * img1 + (1 - weight) * img2
            return self.to_image(miximg), torch.cat([box1, box2])
        return img1, box1

    def mosaic(self, imgpath, cls, imgfile, img_size=640):
        p = torch.rand(1)
        if p > 0.5:
            return load_mosaic(imgpath, cls, imgfile, img_size=img_size)
        else:
            img = Image.open(imgpath)
            jsonpath = '.'.join(imgpath.split('.')[:-1]) + '.json'
            return img, load_json_points(jsonpath, cls)

    def draw_img(self, img, boxes):
        draw = ImageDraw.Draw(img)
        for box in boxes:
            draw.rectangle(list(box[1:]), outline='yellow', width=2)
        img.show()


def load_json_points(file, cls):
    assert isinstance(file, str)
    with open(file, 'r', encoding="utf-8") as f:
        doc = json.load(f)
    # point = [item['points'][0] + item['points'][1] for item in doc['shapes']]
    point = [[cls.index(item['label'])] + item['points'][0] + item['points'][1] for item in doc['shapes']]
    return torch.tensor(point)


def img_arr_to_b64(img_arr):
    img_pil = PIL.Image.fromarray(img_arr)
    f = io.BytesIO()
    img_pil.save(f, format="PNG")
    img_bin = f.getvalue()
    if hasattr(base64, "encodebytes"):
        img_b64 = base64.encodebytes(img_bin)
    else:
        img_b64 = base64.encodestring(img_bin)
    return img_b64


def create_json(img, imagePath, filename, cls, points):
    data = dict()
    data['version'] = VERSION
    data['flags'] = dict()
    info = list()
    for point in points:
        shape_info = dict()
        shape_info['label'] = cls[int(point[0].item())]
        if point is None:
            shape_info['points'] = [[], []]
        else:
            shape_info['points'] = [[point[1].item(), point[2].item()],
                                    [point[3].item(), point[4].item()]]
        shape_info['group_id'] = None
        shape_info['shape_type'] = 'rectangle'
        shape_info['flags'] = dict()
        info.append(shape_info)
    data['shapes'] = info
    data['imagePath'] = imagePath
    height, width = img.shape[:2]
    # data['imageData'] = img_arr_to_b64(img).decode('utf-8')
    data['imageData'] = None  # 减少内存占用
    data['imageHeight'] = height
    data['imageWidth'] = width
    jsondata = json.dumps(data, indent=4, separators=(',', ': '))
    f = open(filename, 'w', encoding="utf-8")
    f.write(jsondata)
    f.close()


def get_all_class(files):
    classes = list()
    print('get class name ......')
    for filename in tqdm(files):
        json_file = json.load(open(filename, "r", encoding="utf-8"))
        for item in json_file["shapes"]:
            label_class = item['label']
            if label_class not in classes:
                classes.append(label_class)
    return classes


def create_datasets(method, extimes=1, path=ROOT_DIR):
    """
    扩充数据集
    # 使用时将py放置在labelme生成的数据集下运行,增强后的图像会保存在‘create’文件夹中
    :param method:数据增强方法
    :param extimes: 数据增强之后需要生成的图像数量
    :param path: 保存路径,默认当前文件夹
    :return:
    """
    if 'mixup' in method:
        method.remove('mixup')
        method.insert(len(method) - 1, 'mixup')
    if 'mosaic' in method:
        method.remove('mosaic')
        method.insert(0, 'mosaic')
    classname = 'DataAugmentation()'
    files = glob(path + "\\*.json")
    cls = get_all_class(files)
    print('processing file ......')
    files = [i.replace("\\", "/").split("/")[-1].split(".json")[0] for i in tqdm(files)]
    externs = ['png', 'jpg', 'JPEG', 'BMP', 'bmp']
    imgfiles = list()
    for extern in externs:
        imgfiles.extend(glob(path + "\\*." + extern))
    print('enhance image ......')
    for imgfile in tqdm(imgfiles):
        filename = '.'.join(imgfile.split('.')[:-1])
        imgfilename = filename.replace("\\", "/").split("/")[-1]
        if imgfilename in files:
            jsonpath = filename + '.json'

            for t in range(extimes):
                image = Image.open(imgfile)
                points = load_json_points(jsonpath, cls)

                for index, funcname in enumerate(method):
                    func = classname + '.' + funcname
                    if funcname == 'mixup':
                        imagepath = np.random.choice(imgfiles, size=1, replace=False)[0]
                        filename1 = '.'.join(imagepath.split('.')[:-1])
                        jsonpath1 = filename1 + '.json'
                        points1 = load_json_points(jsonpath1, cls)
                        image1 = Image.open(imagepath)
                        image, points = eval(func)(image, image1, points, points1)
                    elif funcname == 'mosaic':
                        image, points = eval(func)(imgfile, cls, imgfiles)
                    else:
                        image, points = eval(func)(image, points)

                # for viz
                # print(points)
                # d = classname + '.' + 'draw_img'
                # eval(d)(image, points)

                new_name = str(t) + '_' + imgfilename
                new_image = os.path.join(path, 'create', new_name + '.jpg')
                if not os.path.exists(os.path.join(path, 'create')):
                    os.makedirs(os.path.join(path, 'create'))
                image.save(new_image)

                new_json = os.path.join(path, 'create', new_name + '.json')
                create_json(np.array(image), new_name + '.jpg', new_json, cls, points)


if __name__ == '__main__':
    """
    将该python文件放在数据集路径下运行,生成的图像保存在当前路径下的create目录中
    meth填想要的图像增强方法,所有方法已列在functionList中
    extimes表示扩充倍数
    """
    path = r'文件路径,需要将img和label混合在一起'
    meth = ['random_flip_horizon', 'gaussian_blur', 'random_contrast','random_bright', 'random_cutout','mixup','add_gasuss_noise','mosaic' ]
    # meth = [ 'random_flip_horizon','center_crop', 'random_equalize',
    #             'random_autocontrast', 'random_adjustSharpness', 'random_solarize', 'random_posterize',
    #              'gaussian_blur', 'random_invert', 'random_cutout', 'random_erasing',
    #             'random_bright', 'random_contrast', 'random_saturation', 'add_gasuss_noise',
    #             'add_pepper_noise', 'mixup',  'random_rotate', 'mosaic']
    create_datasets(method=meth, extimes=10,path=path)

转化为coco格式的数据集

将labelme格式数据集转化为coco数据集,通过下面的代码实现,最后的注释里面有运行命令。

import argparse
import glob
import json
import os
import os.path as osp
import shutil
import xml.etree.ElementTree as ET

import numpy as np
import PIL.ImageDraw
from tqdm import tqdm
import cv2

label_to_num = {}
categories_list = []
labels_list = []


class MyEncoder(json.JSONEncoder):
    def default(self, obj):
        if isinstance(obj, np.integer):
            return int(obj)
        elif isinstance(obj, np.floating):
            return float(obj)
        elif isinstance(obj, np.ndarray):
            return obj.tolist()
        else:
            return super(MyEncoder, self).default(obj)


def images_labelme(data, num):
    image = {}
    image['height'] = data['imageHeight']
    image['width'] = data['imageWidth']
    image['id'] = num + 1
    if '\\' in data['imagePath']:
        image['file_name'] = data['imagePath'].split('\\')[-1]
    else:
        image['file_name'] = data['imagePath'].split('/')[-1]
    return image


def images_cityscape(data, num, img_file):
    image = {}
    image['height'] = data['imgHeight']
    image['width'] = data['imgWidth']
    image['id'] = num + 1
    image['file_name'] = img_file
    return image


def categories(label, labels_list):
    category = {}
    category['supercategory'] = 'component'
    category['id'] = len(labels_list) + 1
    category['name'] = label
    return category


def annotations_rectangle(points, label, image_num, object_num, label_to_num):
    annotation = {}
    seg_points = np.asarray(points).copy()
    seg_points[1, :] = np.asarray(points)[2, :]
    seg_points[2, :] = np.asarray(points)[1, :]
    annotation['segmentation'] = [list(seg_points.flatten())]
    annotation['iscrowd'] = 0
    annotation['image_id'] = image_num + 1
    annotation['bbox'] = list(
        map(float, [
            points[0][0], points[0][1], points[1][0] - points[0][0], points[1][
                1] - points[0][1]
        ]))
    annotation['area'] = annotation['bbox'][2] * annotation['bbox'][3]
    annotation['category_id'] = label_to_num[label]
    annotation['id'] = object_num + 1
    return annotation


def annotations_polygon(height, width, points, label, image_num, object_num,
                        label_to_num):
    annotation = {}
    annotation['segmentation'] = [list(np.asarray(points).flatten())]
    annotation['iscrowd'] = 0
    annotation['image_id'] = image_num + 1
    annotation['bbox'] = list(map(float, get_bbox(height, width, points)))
    annotation['area'] = annotation['bbox'][2] * annotation['bbox'][3]
    annotation['category_id'] = label_to_num[label]
    annotation['id'] = object_num + 1
    return annotation


def get_bbox(height, width, points):
    polygons = points
    mask = np.zeros([height, width], dtype=np.uint8)
    mask = PIL.Image.fromarray(mask)
    xy = list(map(tuple, polygons))
    PIL.ImageDraw.Draw(mask).polygon(xy=xy, outline=1, fill=1)
    mask = np.array(mask, dtype=bool)
    index = np.argwhere(mask == 1)
    rows = index[:, 0]
    clos = index[:, 1]
    left_top_r = np.min(rows)
    left_top_c = np.min(clos)
    right_bottom_r = np.max(rows)
    right_bottom_c = np.max(clos)
    return [
        left_top_c, left_top_r, right_bottom_c - left_top_c,
        right_bottom_r - left_top_r
    ]


def deal_json(ds_type, img_path, json_path):
    data_coco = {}
    images_list = []
    annotations_list = []
    image_num = -1
    object_num = -1
    for img_file in os.listdir(img_path):
        img_label = os.path.splitext(img_file)[0]
        if img_file.split('.')[
                -1] not in ['bmp', 'jpg', 'jpeg', 'png', 'JPEG', 'JPG', 'PNG']:
            continue
        label_file = osp.join(json_path, img_label + '.json')
        print('Generating dataset from:', label_file)
        image_num = image_num + 1
        with open(label_file) as f:
            data = json.load(f)
            if ds_type == 'labelme':
                images_list.append(images_labelme(data, image_num))
            elif ds_type == 'cityscape':
                images_list.append(images_cityscape(data, image_num, img_file))
            if ds_type == 'labelme':
                for shapes in data['shapes']:
                    object_num = object_num + 1
                    label = shapes['label']
                    if label not in labels_list:
                        categories_list.append(categories(label, labels_list))
                        labels_list.append(label)
                        label_to_num[label] = len(labels_list)
                    p_type = shapes['shape_type']
                    if p_type == 'polygon':
                        points = shapes['points']
                        annotations_list.append(
                            annotations_polygon(data['imageHeight'], data[
                                'imageWidth'], points, label, image_num,
                                                object_num, label_to_num))

                    if p_type == 'rectangle':
                        (x1, y1), (x2, y2) = shapes['points']
                        x1, x2 = sorted([x1, x2])
                        y1, y2 = sorted([y1, y2])
                        points = [[x1, y1], [x2, y2], [x1, y2], [x2, y1]]
                        annotations_list.append(
                            annotations_rectangle(points, label, image_num,
                                                  object_num, label_to_num))
            elif ds_type == 'cityscape':
                for shapes in data['objects']:
                    object_num = object_num + 1
                    label = shapes['label']
                    if label not in labels_list:
                        categories_list.append(categories(label, labels_list))
                        labels_list.append(label)
                        label_to_num[label] = len(labels_list)
                    points = shapes['polygon']
                    annotations_list.append(
                        annotations_polygon(data['imgHeight'], data[
                            'imgWidth'], points, label, image_num, object_num,
                                            label_to_num))
    data_coco['images'] = images_list
    data_coco['categories'] = categories_list
    data_coco['annotations'] = annotations_list
    return data_coco


def voc_get_label_anno(ann_dir_path, ann_ids_path, labels_path):
    with open(labels_path, 'r') as f:
        labels_str = f.read().split()
    labels_ids = list(range(1, len(labels_str) + 1))

    with open(ann_ids_path, 'r') as f:
        ann_ids = [lin.strip().split(' ')[-1] for lin in f.readlines()]

    ann_paths = []
    for aid in ann_ids:
        if aid.endswith('xml'):
            ann_path = os.path.join(ann_dir_path, aid)
        else:
            ann_path = os.path.join(ann_dir_path, aid + '.xml')
        ann_paths.append(ann_path)

    return dict(zip(labels_str, labels_ids)), ann_paths


def voc_get_image_info(annotation_root, im_id):
    filename = annotation_root.findtext('filename')
    assert filename is not None
    img_name = os.path.basename(filename)

    size = annotation_root.find('size')
    width = float(size.findtext('width'))
    height = float(size.findtext('height'))

    image_info = {
        'file_name': filename,
        'height': height,
        'width': width,
        'id': im_id
    }
    return image_info


def voc_get_coco_annotation(obj, label2id):
    label = obj.findtext('name')
    assert label in label2id, "label is not in label2id."
    category_id = label2id[label]
    bndbox = obj.find('bndbox')
    xmin = float(bndbox.findtext('xmin'))
    ymin = float(bndbox.findtext('ymin'))
    xmax = float(bndbox.findtext('xmax'))
    ymax = float(bndbox.findtext('ymax'))
    assert xmax > xmin and ymax > ymin, "Box size error."
    o_width = xmax - xmin
    o_height = ymax - ymin
    anno = {
        'area': o_width * o_height,
        'iscrowd': 0,
        'bbox': [xmin, ymin, o_width, o_height],
        'category_id': category_id,
        'ignore': 0,
    }
    return anno


def voc_xmls_to_cocojson(annotation_paths, label2id, output_dir, output_file):
    output_json_dict = {
        "images": [],
        "type": "instances",
        "annotations": [],
        "categories": []
    }
    bnd_id = 1  # bounding box start id
    im_id = 0
    print('Start converting !')
    for a_path in tqdm(annotation_paths):
        # Read annotation xml
        ann_tree = ET.parse(a_path)
        ann_root = ann_tree.getroot()

        img_info = voc_get_image_info(ann_root, im_id)
        output_json_dict['images'].append(img_info)

        for obj in ann_root.findall('object'):
            ann = voc_get_coco_annotation(obj=obj, label2id=label2id)
            ann.update({'image_id': im_id, 'id': bnd_id})
            output_json_dict['annotations'].append(ann)
            bnd_id = bnd_id + 1
        im_id += 1

    for label, label_id in label2id.items():
        category_info = {'supercategory': 'none', 'id': label_id, 'name': label}
        output_json_dict['categories'].append(category_info)
    output_file = os.path.join(output_dir, output_file)
    with open(output_file, 'w') as f:
        output_json = json.dumps(output_json_dict)
        f.write(output_json)


def widerface_to_cocojson(root_path):
    train_gt_txt = os.path.join(root_path, "wider_face_split", "wider_face_train_bbx_gt.txt")
    val_gt_txt = os.path.join(root_path, "wider_face_split", "wider_face_val_bbx_gt.txt")
    train_img_dir = os.path.join(root_path, "WIDER_train", "images")
    val_img_dir = os.path.join(root_path, "WIDER_val", "images")
    assert train_gt_txt
    assert val_gt_txt
    assert train_img_dir
    assert val_img_dir
    save_path = os.path.join(root_path, "widerface_train.json")
    widerface_convert(train_gt_txt, train_img_dir, save_path)
    print("Wider Face train dataset converts sucess, the json path: {}".format(save_path))
    save_path = os.path.join(root_path, "widerface_val.json")
    widerface_convert(val_gt_txt, val_img_dir, save_path)
    print("Wider Face val dataset converts sucess, the json path: {}".format(save_path))


def widerface_convert(gt_txt, img_dir, save_path):
    output_json_dict = {
        "images": [],
        "type": "instances",
        "annotations": [],
        "categories": [{'supercategory': 'none', 'id': 0, 'name': "human_face"}]
    }
    bnd_id = 1  # bounding box start id
    im_id = 0
    print('Start converting !')
    with open(gt_txt) as fd:
        lines = fd.readlines()

    i = 0
    while i < len(lines):
        image_name = lines[i].strip()
        bbox_num = int(lines[i + 1].strip())
        i += 2
        img_info = get_widerface_image_info(img_dir, image_name, im_id)
        if img_info:
            output_json_dict["images"].append(img_info)
            for j in range(i, i + bbox_num):
                anno = get_widerface_ann_info(lines[j])
                anno.update({'image_id': im_id, 'id': bnd_id})
                output_json_dict['annotations'].append(anno)
                bnd_id += 1
        else:
            print("The image dose not exist: {}".format(os.path.join(img_dir, image_name)))
        bbox_num = 1 if bbox_num == 0 else bbox_num
        i += bbox_num
        im_id += 1
    with open(save_path, 'w') as f:
        output_json = json.dumps(output_json_dict)
        f.write(output_json)


def get_widerface_image_info(img_root, img_relative_path, img_id):
    image_info = {}
    save_path = os.path.join(img_root, img_relative_path)
    if os.path.exists(save_path):
        img = cv2.imread(save_path)
        image_info["file_name"] = os.path.join(os.path.basename(
            os.path.dirname(img_root)), os.path.basename(img_root),
            img_relative_path)
        image_info["height"] = img.shape[0]
        image_info["width"] = img.shape[1]
        image_info["id"] = img_id
    return image_info


def get_widerface_ann_info(info):
    info = [int(x) for x in info.strip().split()]
    anno = {
        'area': info[2] * info[3],
        'iscrowd': 0,
        'bbox': [info[0], info[1], info[2], info[3]],
        'category_id': 0,
        'ignore': 0,
        'blur': info[4],
        'expression': info[5],
        'illumination': info[6],
        'invalid': info[7],
        'occlusion': info[8],
        'pose': info[9]
    }
    return anno


def main():
    parser = argparse.ArgumentParser(
        formatter_class=argparse.ArgumentDefaultsHelpFormatter)
    parser.add_argument(
        '--dataset_type',
        help='the type of dataset, can be `voc`, `widerface`, `labelme` or `cityscape`')
    parser.add_argument('--json_input_dir', help='input annotated directory')
    parser.add_argument('--image_input_dir', help='image directory')
    parser.add_argument(
        '--output_dir', help='output dataset directory', default='./')
    parser.add_argument(
        '--train_proportion',
        help='the proportion of train dataset',
        type=float,
        default=1.0)
    parser.add_argument(
        '--val_proportion',
        help='the proportion of validation dataset',
        type=float,
        default=0.0)
    parser.add_argument(
        '--test_proportion',
        help='the proportion of test dataset',
        type=float,
        default=0.0)
    parser.add_argument(
        '--voc_anno_dir',
        help='In Voc format dataset, path to annotation files directory.',
        type=str,
        default=None)
    parser.add_argument(
        '--voc_anno_list',
        help='In Voc format dataset, path to annotation files ids list.',
        type=str,
        default=None)
    parser.add_argument(
        '--voc_label_list',
        help='In Voc format dataset, path to label list. The content of each line is a category.',
        type=str,
        default=None)
    parser.add_argument(
        '--voc_out_name',
        type=str,
        default='voc.json',
        help='In Voc format dataset, path to output json file')
    parser.add_argument(
        '--widerface_root_dir',
        help='The root_path for wider face dataset, which contains `wider_face_split`, `WIDER_train` and `WIDER_val`.And the json file will save in this path',
        type=str,
        default=None)
    args = parser.parse_args()
    try:
        assert args.dataset_type in ['voc', 'labelme', 'cityscape', 'widerface']
    except AssertionError as e:
        print(
            'Now only support the voc, cityscape dataset and labelme dataset!!')
        os._exit(0)

    if args.dataset_type == 'voc':
        assert args.voc_anno_dir and args.voc_anno_list and args.voc_label_list
        label2id, ann_paths = voc_get_label_anno(
            args.voc_anno_dir, args.voc_anno_list, args.voc_label_list)
        voc_xmls_to_cocojson(
            annotation_paths=ann_paths,
            label2id=label2id,
            output_dir=args.output_dir,
            output_file=args.voc_out_name)
    elif args.dataset_type == "widerface":
        assert args.widerface_root_dir
        widerface_to_cocojson(args.widerface_root_dir)
    else:
        try:
            assert os.path.exists(args.json_input_dir)
        except AssertionError as e:
            print('The json folder does not exist!')
            os._exit(0)
        try:
            assert os.path.exists(args.image_input_dir)
        except AssertionError as e:
            print('The image folder does not exist!')
            os._exit(0)
        try:
            assert abs(args.train_proportion + args.val_proportion \
                    + args.test_proportion - 1.0) < 1e-5
        except AssertionError as e:
            print(
                'The sum of pqoportion of training, validation and test datase must be 1!'
            )
            os._exit(0)

        # Allocate the dataset.
        total_num = len(glob.glob(osp.join(args.json_input_dir, '*.json')))
        if args.train_proportion != 0:
            train_num = int(total_num * args.train_proportion)
            out_dir = args.output_dir + '/train'
            if not os.path.exists(out_dir):
                os.makedirs(out_dir)
        else:
            train_num = 0
        if args.val_proportion == 0.0:
            val_num = 0
            test_num = total_num - train_num
            out_dir = args.output_dir + '/test'
            if args.test_proportion != 0.0 and not os.path.exists(out_dir):
                os.makedirs(out_dir)
        else:
            val_num = int(total_num * args.val_proportion)
            test_num = total_num - train_num - val_num
            val_out_dir = args.output_dir + '/val'
            if not os.path.exists(val_out_dir):
                os.makedirs(val_out_dir)
            test_out_dir = args.output_dir + '/test'
            if args.test_proportion != 0.0 and not os.path.exists(test_out_dir):
                os.makedirs(test_out_dir)
        count = 1
        for img_name in os.listdir(args.image_input_dir):
            if count <= train_num:
                if osp.exists(args.output_dir + '/train/'):
                    shutil.copyfile(
                        osp.join(args.image_input_dir, img_name),
                        osp.join(args.output_dir + '/train/', img_name))
            else:
                if count <= train_num + val_num:
                    if osp.exists(args.output_dir + '/val/'):
                        shutil.copyfile(
                            osp.join(args.image_input_dir, img_name),
                            osp.join(args.output_dir + '/val/', img_name))
                else:
                    if osp.exists(args.output_dir + '/test/'):
                        shutil.copyfile(
                            osp.join(args.image_input_dir, img_name),
                            osp.join(args.output_dir + '/test/', img_name))
            count = count + 1

        # Deal with the json files.
        if not os.path.exists(args.output_dir + '/annotations'):
            os.makedirs(args.output_dir + '/annotations')
        if args.train_proportion != 0:
            train_data_coco = deal_json(args.dataset_type,
                                        args.output_dir + '/train',
                                        args.json_input_dir)
            train_json_path = osp.join(args.output_dir + '/annotations',
                                       'instance_train.json')
            json.dump(
                train_data_coco,
                open(train_json_path, 'w'),
                indent=4,
                cls=MyEncoder)
        if args.val_proportion != 0:
            val_data_coco = deal_json(args.dataset_type,
                                      args.output_dir + '/val',
                                      args.json_input_dir)
            val_json_path = osp.join(args.output_dir + '/annotations',
                                     'instance_val.json')
            json.dump(
                val_data_coco,
                open(val_json_path, 'w'),
                indent=4,
                cls=MyEncoder)
        if args.test_proportion != 0:
            test_data_coco = deal_json(args.dataset_type,
                                       args.output_dir + '/test',
                                       args.json_input_dir)
            test_json_path = osp.join(args.output_dir + '/annotations',
                                      'instance_test.json')
            json.dump(
                test_data_coco,
                open(test_json_path, 'w'),
                indent=4,
                cls=MyEncoder)


if __name__ == '__main__':
    """
    python tools/x2coco.py \
                --dataset_type labelme \
                --json_input_dir ./labelme_annos/ \
                --image_input_dir ./labelme_imgs/ \
                --output_dir ./mycoco/ \
                --train_proportion 0.8 \
                --val_proportion 0.2 \
                --test_proportion 0.0
    """
    main()



这样就成功将coco自采数据集制作出来了

  • 4
    点赞
  • 6
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值