DAGM2007数据集扩增（COCO格式以及VOC格式的灰度图数据集都可用）

最新推荐文章于 2023-04-28 16:05:23 发布

玩家完成不乐意

最新推荐文章于 2023-04-28 16:05:23 发布

阅读量2.2k

点赞数 3

分类专栏：数据集工具文章标签： opencv 计算机视觉 python

本文链接：https://blog.csdn.net/weixin_43871357/article/details/126165090

版权

数据集工具专栏收录该内容

2 篇文章 0 订阅

订阅专栏

文章目录

前言
COCO格式的数据集扩增
- 代码
- - AugmentMethodClass.py
  - DatasetAugment.py
VOC格式的数据集扩增
- 代码
- - AugmentMethodClass.py
  - DatasetAugment.py

前言

本文主要内容是将COCO格式以及VOC格式的DAGM2007数据集进行扩增

以下代码如果有不规范或错误的地方欢迎大家批评指正！

COCO以及VOC格式的DAGM2007数据集制作方法可以看上一篇博客：
DAGM2007数据集格式转换（DAGM转换为COCO格式、COCO格式转换为VOC格式）

主要包含五种扩增方法：水平翻转、垂直翻转、随机裁剪、随机旋转、随机四张图片拼接

随机裁剪与随机旋转方法主要参考：六种常见的数据扩增方式

随机旋转方法有一丢丢的小缺陷：
上面链接的大佬的思路是：图像旋转后，边界框也会随之旋转，取旋转后边界框四条边的中间点为新生成边界框的四个顶点。
该思路有一点小问题，在旋转一些细长的目标时，新得到的边界框可能会出现不完全包含目标的情况。
因此在本文中采用了opencv中的boundingRect方法得到新的边界框，该方法得到的新边界框虽然能完全包含目标，但是有些边界框会包含过多的背景。
欢迎大家一起讨论改进方法！

COCO格式的数据集扩增

在上一篇博客中因为图像名字有重复，所以为这些图像赋了新名字，这些名字就是编号。在这里也需要为扩增后的新图像赋新名字，新名字的计算方法是在编号上加一个数字，五个方法分别加5000、10000、15000、20000、25000。因为DAGM2007数据集只有一千多张图象，所以不用担心名字起冲突，但如果用于其他数据集的扩增记得修改这些数字，防止名字起冲突，而且默认是保存为.PNG格式的，这里也需要修改。

代码

AugmentMethodClass.py

import os
import cv2
import random
import numpy as np


class AugmentMethod:

    def __init__(self, coco=None, root=None, new_root=None, model='train'):
        self.coco = coco
        self.root = root
        self.model = model
        self.new_root = new_root
        self.mosaic_annot_id = 0

    # horizion为True时水平翻转
    # horizion为False时垂直翻转
    # new_img_info: {'id': , 'width': , 'height': , 'file_name': }
    # new_annot_info: [{'id': , 'image_id': , ...}, ...]
    def flip_image(self, img_info, annot_infos, horizion=True):
        img_path = os.path.join(self.root, self.model, img_info['file_name'])
        img, (h, w) = self.get_img(img_path)

        # process image
        if horizion:
            new_img = img[:, ::-1]
        else:
            new_img = img[::-1, :]

        # 修改图片以及标注信息
        new_img_info = img_info.copy()
        if horizion:
            new_img_info['id'] = new_img_info['id'] + 5000
        else:
            new_img_info['id'] = new_img_info['id'] + 10000
        new_img_info['file_name'] = ''.join('0' for i in range(8 - len(str(new_img_info['id'])))) \
                                    + str(new_img_info['id']) + '.PNG'

        new_annot_infos = []
        for annot_info in annot_infos:
            new_annot_info = annot_info.copy()

            if horizion:
                new_annot_info['id'] = new_annot_info['id'] + 5000
            else:
                new_annot_info['id'] = new_annot_info['id'] + 10000
            new_annot_info['image_id'] = new_img_info['id']

            # 若没有下面的这个copy()
            # 当我尝试改变new_annot_info['bbox']中的值时，annot_info['bbox']中的值也将会改变
            new_annot_info['bbox'] = annot_info['bbox'].copy()
            if horizion:
                new_annot_info['bbox'][0] = w - new_annot_info['bbox'][0] - new_annot_info['bbox'][2]
            else:
                new_annot_info['bbox'][1] = h - new_annot_info['bbox'][1] - new_annot_info['bbox'][3]

            new_annot_infos.append(new_annot_info)

        # 将新图片写入到新路径
        new_path = os.path.join(self.new_root, self.model, new_img_info['file_name'])
        if not os.path.exists(new_path):
            cv2.imwrite(new_path, new_img)

        return new_img_info, new_annot_infos

    # 随机切割
    def crop_image(self, img_info, annot_infos, random_seed):
        random.seed(random_seed)

        img_path = os.path.join(self.root, self.model, img_info['file_name'])
        img, (h, w) = self.get_img(img_path)

        # 对图像进行处理
        # 计算出图像四条边到bbox四条边的距离
        d_to_left, d_to_right, d_to_top, d_to_bottom = [], [], [], []
        for annot_info in annot_infos:
            d_to_left.append(annot_info['bbox'][0])
            d_to_right.append(w - (annot_info['bbox'][0] + annot_info['bbox'][2]))
            d_to_top.append(annot_info['bbox'][1])
            d_to_bottom.append(h - (annot_info['bbox'][1] + annot_info['bbox'][3]))

        # 确定裁剪后的图片范围
        range_crop_left = int(random.uniform(0, min(d_to_left)))
        range_crop_right = w - int(random.uniform(0, min(d_to_right)))
        range_crop_top = int(random.uniform(0, min(d_to_top)))
        range_crop_bottom = h - int(random.uniform(0, min(d_to_bottom)))

        # 将裁剪后的图像填充为原图像大小（0填充）
        # np.array[行索引, 列索引]
        crop_img = img[range_crop_top: range_crop_bottom, range_crop_left: range_crop_right]
        new_img = np.zeros((h, w)).astype(img.dtype)
        new_img[0: (range_crop_bottom - range_crop_top), 0: (range_crop_right - range_crop_left)] = crop_img

        # 修改标注信息
        new_img_info = img_info.copy()
        new_img_info['id'] = new_img_info['id'] + 15000
        new_img_info['file_name'] = ''.join('0' for i in range(8 - len(str(new_img_info['id'])))) \
                                    + str(new_img_info['id']) + '.PNG'

        new_annot_infos = []
        for annot_info in annot_infos:
            new_annot_info = annot_info.copy()
            new_annot_info['id'] = new_annot_info['id'] + 15000
            new_annot_info['image_id'] = new_img_info['id']
            new_annot_info['bbox'] = annot_info['bbox'].copy()

            new_annot_info['bbox'][0] = max(0, new_annot_info['bbox'][0] - range_crop_left)
            new_annot_info['bbox'][1] = max(0, new_annot_info['bbox'][1] - range_crop_top)

            new_annot_infos.append(new_annot_info)

        # 将新图片写入到新路径
        new_path = os.path.join(self.new_root, self.model, new_img_info['file_name'])
        if not os.path.exists(new_path):
            cv2.imwrite(new_path, new_img)

        return new_img_info, new_annot_infos

    # 随机旋转
    # https://www.cnblogs.com/lky-learning/p/11653861.html 大佬很厉害
    def rotate_image(self, img_info, annot_infos, random_seed):
        random.seed(random_seed)
        angle = int(random.uniform(0, 180))

        img_path = os.path.join(self.root, self.model, img_info['file_name'])
        img, (h, w) = self.get_img(img_path)
        (cx, cy) = (w // 2, h // 2)

        # 对图像进行旋转处理
        M = cv2.getRotationMatrix2D((cx, cy), angle, 1.0)
        new_img = cv2.warpAffine(img, M, (w, h))
        # cv2.imshow(f"{angle}", new_img)
        # cv2.waitKey(0)

        new_img_info = img_info.copy()
        new_img_info['id'] = new_img_info['id'] + 20000
        new_img_info['file_name'] = ''.join('0' for i in range(8 - len(str(new_img_info['id'])))) \
                                    + str(new_img_info['id']) + '.PNG'
        # 先矫正bbox
        new_annot_infos = []
        for annot_info in annot_infos:
            new_annot_info = annot_info.copy()
            new_annot_info['bbox'] = annot_info['bbox'].copy()
            new_annot_info['id'] = new_annot_info['id'] + 20000
            new_annot_info['image_id'] = new_img_info['id']

            left_top = np.array([new_annot_info['bbox'][0],
                                 new_annot_info['bbox'][1], 1])
            top_right = np.array([new_annot_info['bbox'][0] + new_annot_info['bbox'][2],
                                  new_annot_info['bbox'][1], 1])
            right_bottom = np.array([new_annot_info['bbox'][0] + new_annot_info['bbox'][2],
                                     new_annot_info['bbox'][1] + new_annot_info['bbox'][3], 1])
            bottom_left = np.array([new_annot_info['bbox'][0],
                                    new_annot_info['bbox'][1] + new_annot_info['bbox'][3], 1])

            left_top = np.dot(M, left_top)
            top_right = np.dot(M, top_right)
            right_bottom = np.dot(M, right_bottom)
            bottom_left = np.dot(M, bottom_left)
            concat = np.vstack((left_top, top_right, right_bottom, bottom_left))
            concat = concat.astype(np.int32)
            rx, ry, rw, rh = cv2.boundingRect(concat)

            if (rx >= w) or (ry >= h) or ((rx + w) <= 0) or ((ry + h) <= 0):
                continue

            rx = rx if rx > 0 else 0
            ry = ry if ry > 0 else 0
            new_annot_info['bbox'][0] = rx
            new_annot_info['bbox'][1] = ry
            new_annot_info['bbox'][2] = w - rx - 1 if rx + rw >= w else rw
            new_annot_info['bbox'][3] = h - ry - 1 if ry + rh >= h else rh

            new_annot_infos.append(new_annot_info)

        if len(new_annot_infos) == 0:
            return new_img_info, new_annot_infos

        # 将新图片写入到新路径
        new_path = os.path.join(self.new_root, self.model, new_img_info['file_name'])
        if not os.path.exists(new_path):
            cv2.imwrite(new_path, new_img)

        return new_img_info, new_annot_infos

    # mosaic
    def mosaic(self, index_list, id):
        img_infos = self.coco.loadImgs(ids=index_list)

        center_x = max([i['width'] for i in img_infos])
        center_y = max([i['height'] for i in img_infos])

        new_img_info = {'id': id + 25000,
                        'width': center_x * 2,
                        'height': center_y * 2}
        new_img_info['file_name'] = ''.join('0' for i in range(8 - len(str(new_img_info['id'])))) \
                                    + str(new_img_info['id']) + '.PNG'
        new_annot_infos = []
        for i, (img_info, img_id) in enumerate(zip(img_infos, index_list)):
            img, (h, w) = self.get_img(os.path.join(self.root, self.model, img_info['file_name']))
            annot_infos = self.coco.loadAnns(self.coco.getAnnIds(imgIds=img_id))

            if i == 0:
                new_img = np.zeros((center_y * 2, center_x * 2)).astype(img.dtype)
                # top_left
                x_min, y_min, x_max, y_max = center_x - w, center_y - h, center_x, center_y

            elif i == 1:
                # top_right
                x_min, y_min, x_max, y_max = center_x, center_y - h, center_x + w, center_y
            elif i == 2:
                # bottom_left
                x_min, y_min, x_max, y_max = center_x - w, center_y, center_x, center_y + h
            elif i == 3:
                # bottom_right
                x_min, y_min, x_max, y_max = center_x, center_y, center_x + w, center_y + h

            new_img[y_min: y_max, x_min: x_max] = img

            for annot in annot_infos:
                new_annot_info = annot.copy()
                new_annot_info['id'] = self.mosaic_annot_id + 25000
                new_annot_info['image_id'] = new_img_info['id']
                new_annot_info['bbox'] = annot['bbox'].copy()
                new_annot_info['bbox'][0] = x_min + new_annot_info['bbox'][0]
                new_annot_info['bbox'][1] = y_min + new_annot_info['bbox'][1]
                new_annot_infos.append(new_annot_info)

                self.mosaic_annot_id += 1

        # 将新图片写入到新路径
        new_path = os.path.join(self.new_root, self.model, new_img_info['file_name'])
        if not os.path.exists(new_path):
            cv2.imwrite(new_path, new_img)

        return new_img_info, new_annot_infos

    def get_img(self, path):
        img = cv2.imread(path)[:, :, 0]
        h, w = img.shape

        return img, (h, w)

DatasetAugment.py

from pycocotools.coco import COCO
import numpy as np
import os
import json
from AugmentMethodClass import AugmentMethod
import shutil
import random
from tqdm import tqdm


# roots
root = '/DAGM2007-COCO'
set_name = 'train'
new_root = '/DAGM2007-COCO-v2'
if not os.path.exists(new_root):
    print("The path: '" + new_root + "' doesn't exist! Need to create new path")
    os.makedirs(new_root)
    os.makedirs(os.path.join(new_root, 'annotations'))
    os.makedirs(os.path.join(new_root, 'train'))

dagm_coco = COCO(os.path.join(root, 'annotations', 'instances_' + set_name + '.json'))
augment = AugmentMethod(dagm_coco, root, new_root)
image_ids = dagm_coco.getImgIds()

dagm_images = dagm_coco.loadImgs(image_ids)
dagm_categories = dagm_coco.loadCats(dagm_coco.getCatIds())
dagm_annotations = dagm_coco.loadAnns(dagm_coco.getAnnIds(imgIds=image_ids))

# 遍历图像，将每张图像进行扩增
for i, index in enumerate(tqdm(image_ids)):
    img_info = dagm_coco.loadImgs(ids=index)[0]
    annot_infos = dagm_coco.loadAnns(dagm_coco.getAnnIds(imgIds=index))

    # 原训练集复制到新路径下
    image_path = os.path.join(root, set_name, img_info['file_name'])
    new_image_path = os.path.join(new_root, set_name, img_info['file_name'])
    shutil.copy(image_path, new_image_path)

    # 原训练集水平翻转
    flipped_img_info, flipped_annot_infos = augment.flip_image(img_info, annot_infos)
    dagm_images.append(flipped_img_info)
    for annot in flipped_annot_infos:
        dagm_annotations.append(annot)

    # 原训练集竖直翻转
    vertical_flipped_img_info, vertical_flipped_annot_infos = augment.flip_image(img_info, annot_infos, horizion=False)
    dagm_images.append(vertical_flipped_img_info)
    for annot in vertical_flipped_annot_infos:
        dagm_annotations.append(annot)

    # 原训练集随机裁剪
    crop_img_info, crop_annot_infos = augment.crop_image(img_info, annot_infos, i)
    dagm_images.append(crop_img_info)
    for annot in crop_annot_infos:
        dagm_annotations.append(annot)

    # 原训练集随机旋转
    rotate_img_info, rotate_annot_infos = augment.rotate_image(img_info, annot_infos, i)
    if len(rotate_annot_infos) != 0:
        dagm_images.append(rotate_img_info)
        for annot in rotate_annot_infos:
            dagm_annotations.append(annot)

print('数据集已复制到新路径下，并进行（水平翻转、竖直翻转、随机裁剪、随机旋转）等方式的扩增')
print('现采用yolo中的mosaic方法对原数据集进行扩增')

# mosaic ing
for random_seed in tqdm(range(4)):
    random.seed(random_seed)
    random.shuffle(image_ids)
    num_yolo = int(len(image_ids) / 4)
    yolo_image_ids = np.resize(image_ids[: num_yolo * 4], (num_yolo, 4))

    for i, index_list in enumerate(yolo_image_ids):
        mosaic_img_info, mosaic_annot_infos = augment.mosaic(index_list, i + random_seed * len(yolo_image_ids))
        dagm_images.append(mosaic_img_info)
        for annot in mosaic_annot_infos:
            dagm_annotations.append(annot)

coco = {'images': dagm_images, 'annotations': dagm_annotations, 'categories': dagm_categories}

file_name = f'{new_root}/annotations/instances_{set_name}.json'
if os.path.exists(file_name):
    os.remove(file_name)
json.dump(coco, open(file_name, 'w'))

print('数据集扩增完成！！！！')

VOC格式的数据集扩增

思路和COCO格式的扩增方法一样，只是针对格式不同对代码做了一些调整
因为这个是用来扩增博主的其他数据集，扩增后图像的命名方式同上面的不太一样。这里是在原图像名称上增加_000、_011、_001、_010（水平翻转、垂直翻转、随机切割、随机旋转），随机四张拼接的图像名称是直接将原四张图象的名字拼接起来。默认是保存为.jpg格式的。原数据集图像格式也需要为.jpg，若为其他格式同样需要修改代码。

代码

AugmentMethodClass.py

import os
import copy
import random
import xml.etree.ElementTree as ET
from lxml import etree, objectify
import cv2.cv2 as cv2
import numpy as np
import shutil


class AugmentMethod:
    def __init__(self, root, new_root, set='train'):
        self.root = root
        self.new_root = new_root
        self.xml_root = os.path.join(root, 'Annotations')
        self.images_root = os.path.join(root, 'JPEGImages')
        self.names_root = os.path.join(root, 'ImageSets', 'Main')
        self.name_list = list()
        with open(os.path.join(self.names_root, f'{set}.txt'), 'r') as f:
            for i in f.readlines():
                self.name_list.append(i.split('\n')[0])
        f.close()

    def parseXmlFiles(self):
        infos = list()
        for name in self.name_list:
            info = dict()
            info['folder'] = self.new_root
            info['filename'] = name + '.jpg'

            xml_path = os.path.join(self.xml_root, name + '.xml')
            tree = ET.parse(xml_path)
            root = tree.getroot()

            size = dict()
            xml_size = root.find('size')
            size['width'] = xml_size.find('width').text
            size['height'] = xml_size.find('height').text
            size['depth'] = xml_size.find('depth').text
            info['size'] = size

            objects = list()
            xml_object_set = root.findall('object')
            for xml_object in xml_object_set:
                object = dict()
                object['name'] = xml_object.find('name').text
                object['pose'] = xml_object.find('pose').text
                object['truncated'] = xml_object.find('truncated').text
                object['difficult'] = xml_object.find('difficult').text
                object['bndbox'] = dict()
                for i in xml_object.find('bndbox'):
                    object['bndbox'][i.tag] = i.text
                objects.append(object)

            info['object'] = objects
            infos.append(info)

        return infos

    # 水平翻转新图像名字后增加"_000"
    # 垂直翻转新图像名字后增加"_011"
    def flip_image(self, infos, horizion=True):
        infos_ori = copy.deepcopy(infos)
        name_list = list()
        for info in infos_ori:
            # 处理图像
            img = cv2.imread(os.path.join(self.images_root, info['filename']), -1)
            if horizion:
                new_img = img[:, ::-1]
                info['filename'] = info['filename'].split('.tif')[0] + "_000" + ".jpg"
            else:
                new_img = img[::-1, :]
                info['filename'] = info['filename'].split('.tif')[0] + "_011" + ".jpg"

            # 处理标注信息
            for object in info['object']:
                x_min = int(object['bndbox']['xmin'])
                x_max = int(object['bndbox']['xmax'])
                y_min = int(object['bndbox']['ymin'])
                y_max = int(object['bndbox']['ymax'])
                w = x_max - x_min
                h = y_max - y_min

                if horizion:
                    object['bndbox']['xmin'] = int(info['size']['width']) - x_min - w
                    object['bndbox']['xmax'] = int(object['bndbox']['xmin']) + w
                else:
                    object['bndbox']['ymin'] = int(info['size']['height']) - y_min - h
                    object['bndbox']['ymax'] = int(object['bndbox']['ymin']) + h

            new_path = os.path.join(self.new_root, 'JPEGImages', info['filename'])
            cv2.imwrite(new_path, new_img)
            self.write_xml(info)

            name_list.append(info['filename'].split('.jpg')[0])
        return name_list

    # 新图像名字后增加"_001"
    def crop_image(self, infos, random_seed):
        name_list = list()
        random.seed(random_seed)
        infos_ori = copy.deepcopy(infos)
        for info in infos_ori:
            # 处理图像
            img = cv2.imread(os.path.join(self.images_root, info['filename']), -1)
            d_to_left, d_to_right, d_to_top, d_to_bottom = [], [], [], []
            for obj in info['object']:
                d_to_left.append(int(obj['bndbox']['xmin']))
                d_to_right.append(int(info['size']['width']) - int(obj['bndbox']['xmax']))
                d_to_top.append(int(obj['bndbox']['ymin']))
                d_to_bottom.append(int(info['size']['height']) - int(obj['bndbox']['ymax']))

            # 确定裁剪后的图片范围
            range_crop_left = int(random.uniform(0, min(d_to_left)))
            range_crop_right = int(info['size']['width']) - int(random.uniform(0, min(d_to_right)))
            range_crop_top = int(random.uniform(0, min(d_to_top)))
            range_crop_bottom = int(info['size']['height']) - int(random.uniform(0, min(d_to_bottom)))

            # 将裁剪后的图像填充为原图像大小（0填充）
            # np.array[行索引, 列索引]
            crop_img = img[range_crop_top: range_crop_bottom, range_crop_left: range_crop_right]
            new_img = np.zeros((int(info['size']['height']), int(info['size']['width']))).astype(img.dtype)
            new_img[0: (range_crop_bottom - range_crop_top), 0: (range_crop_right - range_crop_left)] = crop_img

            # 处理标注信息
            info['filename'] = info['filename'].split('.jpg')[0] + "_001" + ".jpg"
            for obj in info['object']:
                x_min = int(obj['bndbox']['xmin'])
                x_max = int(obj['bndbox']['xmax'])
                y_min = int(obj['bndbox']['ymin'])
                y_max = int(obj['bndbox']['ymax'])
                w = x_max - x_min
                h = y_max - y_min

                obj['bndbox']['xmin'] = max(0, x_min - range_crop_left)
                obj['bndbox']['ymin'] = max(0, y_min - range_crop_top)
                obj['bndbox']['xmax'] = obj['bndbox']['xmin'] + w
                obj['bndbox']['ymax'] = obj['bndbox']['ymin'] + h

            name_list.append(info['filename'].split('.tif')[0])

            new_path = os.path.join(self.new_root, 'JPEGImages', info['filename'])
            cv2.imwrite(new_path, new_img)

            self.write_xml(info)
        return name_list

    # 新图像名字后增加"_010"
    def rotate_image(self, infos, random_seed):
        infos_ori = copy.deepcopy(infos)
        name_list = list()
        random.seed(random_seed)
        angle = int(random.uniform(0, 180))
        for info in infos_ori:
            # 处理图像
            img = cv2.imread(os.path.join(self.images_root, info['filename']), -1)
            cx, cy = int(info['size']['width']) // 2, int(info['size']['height']) // 2
            M = cv2.getRotationMatrix2D((cx, cy), angle, 1.0)
            new_img = cv2.warpAffine(img, M, (int(info['size']['width']), int(info['size']['height'])))

            # 处理标注信息
            info['filename'] = info['filename'].split('.jpg')[0] + "_010" + ".jpg"
            for obj in info['object']:
                x_min = int(obj['bndbox']['xmin'])
                x_max = int(obj['bndbox']['xmax'])
                y_min = int(obj['bndbox']['ymin'])
                y_max = int(obj['bndbox']['ymax'])
                w = x_max - x_min
                h = y_max - y_min

                left_top = np.array([x_min, y_min, 1])
                top_right = np.array([x_max, y_min, 1])
                right_bottom = np.array([x_max, y_max, 1])
                bottom_left = np.array([x_min, y_max, 1])

                left_top = np.dot(M, left_top)
                top_right = np.dot(M, top_right)
                right_bottom = np.dot(M, right_bottom)
                bottom_left = np.dot(M, bottom_left)
                concat = np.vstack((left_top, top_right, right_bottom, bottom_left))
                concat = concat.astype(np.int32)
                rx, ry, rw, rh = cv2.boundingRect(concat)

                if (rx >= int(info['size']['width'])) \
                        or (ry >= int(info['size']['height'])) \
                        or ((rx + int(info['size']['width'])) <= 0) \
                        or ((ry + int(info['size']['height'])) <= 0):
                    info['object'].remove(obj)
                rx = rx if rx > 0 else 0
                ry = ry if ry > 0 else 0
                obj['bndbox']['xmin'] = rx
                obj['bndbox']['ymin'] = ry
                rw = int(info['size']['width']) - rx - 1 if rx + rw >= int(info['size']['width']) else rw
                rh = int(info['size']['height']) - ry - 1 if ry + rh >= int(info['size']['height']) else rh
                obj['bndbox']['xmax'] = rx + rw
                obj['bndbox']['ymax'] = ry + rh

            if len(info['object']) == 0:
                continue
            name_list.append(info['filename'].split('.jpg')[0])
            new_path = os.path.join(self.new_root, 'JPEGImages', info['filename'])
            cv2.imwrite(new_path, new_img)
            self.write_xml(info)
        return name_list

    # 四张图象名字拼接
    def mosaic(self, all_infos):
        name_list = list()
        for random_seed in range(4):
            infos_ori = copy.deepcopy(all_infos)
            random.seed(random_seed)
            random.shuffle(infos_ori)
            num_info = int(len(infos_ori) / 4)

            resized_infos = np.resize(infos_ori[: num_info * 4], (num_info, 4))
            for dfhhedd, infos in enumerate(resized_infos):
                center_x = max([int(i['size']['width']) for i in infos])
                center_y = max([int(i['size']['height']) for i in infos])

                new_info = dict()
                new_info['folder'] = self.new_root
                new_info['filename'] = infos[0]['filename'].split('.jpg')[0] + '_' + \
                                       infos[1]['filename'].split('.jpg')[0] + '_' + \
                                       infos[2]['filename'].split('.jpg')[0] + '_' + \
                                       infos[3]['filename'].split('.jpg')[0] + '.jpg'
                new_info['size'] = {'width': center_x * 2, 'height': center_y * 2, 'depth': 1}
                new_info['object'] = list()
                for j, info in enumerate(infos):
                    img = cv2.imread(os.path.join(self.images_root, info['filename']), -1)

                    w = int(info['size']['width'])
                    h = int(info['size']['height'])

                    if j == 0:
                        new_img = np.zeros((center_y * 2, center_x * 2)).astype(img.dtype)
                        # top_left
                        x_min, y_min, x_max, y_max = center_x - w, center_y - h, center_x, center_y
                    elif j == 1:
                        # top_right
                        x_min, y_min, x_max, y_max = center_x, center_y - h, center_x + w, center_y
                    elif j == 2:
                        # bottom_left
                        x_min, y_min, x_max, y_max = center_x - w, center_y, center_x, center_y + h
                    elif j == 3:
                        # bottom_right
                        x_min, y_min, x_max, y_max = center_x, center_y, center_x + w, center_y + h

                    new_img[y_min: y_max, x_min: x_max] = img

                    for obj in info['object']:
                        obj['bndbox']['xmin'] = x_min + int(obj['bndbox']['xmin'])
                        obj['bndbox']['ymin'] = y_min + int(obj['bndbox']['ymin'])
                        obj['bndbox']['xmax'] = x_min + int(obj['bndbox']['xmax'])
                        obj['bndbox']['ymax'] = y_min + int(obj['bndbox']['ymax'])

                        new_info['object'].append(obj)

                name_list.append(new_info['filename'].split('.jpg')[0])

                new_path = os.path.join(self.new_root, 'JPEGImages', new_info['filename'])
                cv2.imwrite(new_path, new_img)
                self.write_xml(new_info)
        return name_list

    def write_xml(self, info):
        E = objectify.ElementMaker(annotate=False)
        anno_tree = E.annotation(
            E.folder(self.new_root),
            E.filename(info['filename']),
            E.size(
                E.width(info['size']['width']),
                E.height(info['size']['height']),
                E.depth(info['size']['depth'])
            ),
            E.segmented(0)
        )
        for obj in info['object']:
            E2 = objectify.ElementMaker(annotate=False)
            anno_tree2 = E2.object(
                E.name(obj['name']),
                E.pose(obj['pose']),
                E.truncated(obj['truncated']),
                E.difficult(obj['difficult']),
                E.bndbox(
                    E.xmin(obj['bndbox']['xmin']),
                    E.ymin(obj['bndbox']['ymin']),
                    E.xmax(obj['bndbox']['xmax']),
                    E.ymax(obj['bndbox']['ymax'])
                )
            )
            anno_tree.append(anno_tree2)
        save_path = os.path.join(self.new_root, 'Annotations', info['filename'].split('.jpg')[0] + '.xml')
        etree.ElementTree(anno_tree).write(save_path, pretty_print=True)

    def copy(self, infos):
        name_list = list()
        for info in infos:
            shutil.copy(os.path.join(self.images_root, info['filename']),
                        os.path.join(self.new_root, 'JPEGImages', info['filename']))
            self.write_xml(info)
            name_list.append(info['filename'].split('.jpg')[0])

        return name_list

DatasetAugment.py

import os
import random
from AugmentMethodClass import AugmentMethod


root = '/lalala_VOC'
new_root = '/lalala_VOC_v2'
if not os.path.exists(new_root):
    os.makedirs(os.path.join(new_root, 'Annotations'))
    os.makedirs(os.path.join(new_root, 'JPEGImages'))
    os.makedirs(os.path.join(new_root, 'ImageSets', 'Main'))
else:
    os.remove(new_root)

VOC_dataset = AugmentMethod(root, new_root)
train_infos = VOC_dataset.parseXmlFiles()
val_VOC_dataset = AugmentMethod(root, new_root, 'val')
val_infos = val_VOC_dataset.parseXmlFiles()

train_name_list = list()
random_seed = 815
name_list0 = VOC_dataset.copy(train_infos)
name_list1 = VOC_dataset.flip_image(train_infos)
name_list2 = VOC_dataset.flip_image(train_infos, False)
name_list3 = VOC_dataset.crop_image(train_infos, random_seed)
name_list4 = VOC_dataset.rotate_image(train_infos, random_seed)
name_list5 = VOC_dataset.mosaic(train_infos)

train_name_list.extend(name_list0)
train_name_list.extend(name_list1)
train_name_list.extend(name_list2)
train_name_list.extend(name_list3)
train_name_list.extend(name_list4)
train_name_list.extend(name_list5)

with open(os.path.join(new_root, 'ImageSets', 'Main', 'train.txt'), 'w') as f:
    for i, name in enumerate(train_name_list):
        f.write(name)
        f.write('\n')
    # print(i)

val_name_list = val_VOC_dataset.copy(val_infos)
with open(os.path.join(new_root, 'ImageSets', 'Main', 'val.txt'), 'w') as f:
    for i, name in enumerate(val_name_list):
        f.write(name)
        f.write('\n')
    # print(i)

玩家完成不乐意

关注

3
点赞
踩
16

收藏

觉得还不错? 一键收藏
打赏
10
评论
DAGM2007数据集扩增（COCO格式以及VOC格式的灰度图数据集都可用）

本文主要内容是将COCO格式以及VOC格式的DAGM2007数据集进行扩增，主要包含五种扩增方法：水平翻转、垂直翻转、随机裁剪、随机旋转、随机四张图片拼接
复制链接

扫一扫