文章目录
前言
本文主要内容是将COCO格式以及VOC格式的DAGM2007数据集进行扩增
以下代码如果有不规范或错误的地方欢迎大家批评指正!
COCO以及VOC格式的DAGM2007数据集制作方法可以看上一篇博客:
DAGM2007数据集格式转换(DAGM转换为COCO格式、COCO格式转换为VOC格式)
主要包含五种扩增方法:水平翻转、垂直翻转、随机裁剪、随机旋转、随机四张图片拼接
随机裁剪与随机旋转方法主要参考:六种常见的数据扩增方式
随机旋转方法有一丢丢的小缺陷:
上面链接的大佬的思路是:图像旋转后,边界框也会随之旋转,取旋转后边界框四条边的中间点为新生成边界框的四个顶点。
该思路有一点小问题,在旋转一些细长的目标时,新得到的边界框可能会出现不完全包含目标的情况。
因此在本文中采用了opencv中的boundingRect方法得到新的边界框,该方法得到的新边界框虽然能完全包含目标,但是有些边界框会包含过多的背景。
欢迎大家一起讨论改进方法!
COCO格式的数据集扩增
在上一篇博客中因为图像名字有重复,所以为这些图像赋了新名字,这些名字就是编号。在这里也需要为扩增后的新图像赋新名字,新名字的计算方法是在编号上加一个数字,五个方法分别加5000、10000、15000、20000、25000。因为DAGM2007数据集只有一千多张图象,所以不用担心名字起冲突,但如果用于其他数据集的扩增记得修改这些数字,防止名字起冲突,而且默认是保存为.PNG格式的,这里也需要修改。
代码
AugmentMethodClass.py
import os
import cv2
import random
import numpy as np
class AugmentMethod:
def __init__(self, coco=None, root=None, new_root=None, model='train'):
self.coco = coco
self.root = root
self.model = model
self.new_root = new_root
self.mosaic_annot_id = 0
# horizion为True时水平翻转
# horizion为False时垂直翻转
# new_img_info: {'id': , 'width': , 'height': , 'file_name': }
# new_annot_info: [{'id': , 'image_id': , ...}, ...]
def flip_image(self, img_info, annot_infos, horizion=True):
img_path = os.path.join(self.root, self.model, img_info['file_name'])
img, (h, w) = self.get_img(img_path)
# process image
if horizion:
new_img = img[:, ::-1]
else:
new_img = img[::-1, :]
# 修改图片以及标注信息
new_img_info = img_info.copy()
if horizion:
new_img_info['id'] = new_img_info['id'] + 5000
else:
new_img_info['id'] = new_img_info['id'] + 10000
new_img_info['file_name'] = ''.join('0' for i in range(8 - len(str(new_img_info['id'])))) \
+ str(new_img_info['id']) + '.PNG'
new_annot_infos = []
for annot_info in annot_infos:
new_annot_info = annot_info.copy()
if horizion:
new_annot_info['id'] = new_annot_info['id'] + 5000
else:
new_annot_info['id'] = new_annot_info['id'] + 10000
new_annot_info['image_id'] = new_img_info['id']
# 若没有下面的这个copy()
# 当我尝试改变new_annot_info['bbox']中的值时,annot_info['bbox']中的值也将会改变
new_annot_info['bbox'] = annot_info['bbox'].copy()
if horizion:
new_annot_info['bbox'][0] = w - new_annot_info['bbox'][0] - new_annot_info['bbox'][2]
else:
new_annot_info['bbox'][1] = h - new_annot_info['bbox'][1] - new_annot_info['bbox'][3]
new_annot_infos.append(new_annot_info)
# 将新图片写入到新路径
new_path = os.path.join(self.new_root, self.model, new_img_info['file_name'])
if not os.path.exists(new_path):
cv2.imwrite(new_path, new_img)
return new_img_info, new_annot_infos
# 随机切割
def crop_image(self, img_info, annot_infos, random_seed):
random.seed(random_seed)
img_path = os.path.join(self.root, self.model, img_info['file_name'])
img, (h, w) = self.get_img(img_path)
# 对图像进行处理
# 计算出图像四条边到bbox四条边的距离
d_to_left, d_to_right, d_to_top, d_to_bottom = [], [], [], []
for annot_info in annot_infos:
d_to_left.append(annot_info['bbox'][0])
d_to_right.append(w - (annot_info['bbox'][0] + annot_info['bbox'][2]))
d_to_top.append(annot_info['bbox'][1])
d_to_bottom.append(h - (annot_info['bbox'][1] + annot_info['bbox'][3]))
# 确定裁剪后的图片范围
range_crop_left = int(random.uniform(0, min(d_to_left)))
range_crop_right = w - int(random.uniform(0, min(d_to_right)))
range_crop_top = int(random.uniform(0, min(d_to_top)))
range_crop_bottom = h - int(random.uniform(0, min(d_to_bottom)))
# 将裁剪后的图像填充为原图像大小(0填充)
# np.array[行索引, 列索引]
crop_img = img[range_crop_top: range_crop_bottom, range_crop_left: range_crop_right]
new_img = np.zeros((h, w)).astype(img.dtype)
new_img[0: (range_crop_bottom - range_crop_top), 0: (range_crop_right - range_crop_left)] = crop_img
# 修改标注信息
new_img_info = img_info.copy()
new_img_info['id'] = new_img_info['id'] + 15000
new_img_info['file_name'] = ''.join('0' for i in range(8 - len(str(new_img_info['id'])))) \
+ str(new_img_info['id']) + '.PNG'
new_annot_infos = []
for annot_info in annot_infos:
new_annot_info = annot_info.copy()
new_annot_info['id'] = new_annot_info['id'] + 15000
new_annot_info['image_id'] = new_img_info['id']
new_annot_info['bbox'] = annot_info['bbox'].copy()
new_annot_info['bbox'][0] = max(0, new_annot_info['bbox'][0] - range_crop_left)
new_annot_info['bbox'][1] = max(0, new_annot_info['bbox'][1] - range_crop_top)
new_annot_infos.append(new_annot_info)
# 将新图片写入到新路径
new_path = os.path.join(self.new_root, self.model, new_img_info['file_name'])
if not os.path.exists(new_path):
cv2.imwrite(new_path, new_img)
return new_img_info, new_annot_infos
# 随机旋转
# https://www.cnblogs.com/lky-learning/p/11653861.html 大佬很厉害
def rotate_image(self, img_info, annot_infos, random_seed):
random.seed(random_seed)
angle = int(random.uniform(0, 180))
img_path = os.path.join(self.root, self.model, img_info['file_name'])
img, (h, w) = self.get_img(img_path)
(cx, cy) = (w // 2, h // 2)
# 对图像进行旋转处理
M = cv2.getRotationMatrix2D((cx, cy), angle, 1.0)
new_img = cv2.warpAffine(img, M, (w, h))
# cv2.imshow(f"{angle}", new_img)
# cv2.waitKey(0)
new_img_info = img_info.copy()
new_img_info['id'] = new_img_info['id'] + 20000
new_img_info['file_name'] = ''.join('0' for i in range(8 - len(str(new_img_info['id'])))) \
+ str(new_img_info['id']) + '.PNG'
# 先矫正bbox
new_annot_infos = []
for annot_info in annot_infos:
new_annot_info = annot_info.copy()
new_annot_info['bbox'] = annot_info['bbox'].copy()
new_annot_info['id'] = new_annot_info['id'] + 20000
new_annot_info['image_id'] = new_img_info['id']
left_top = np.array([new_annot_info['bbox'][0],
new_annot_info['bbox'][1], 1])
top_right = np.array([new_annot_info['bbox'][0] + new_annot_info['bbox'][2],
new_annot_info['bbox'][1], 1])
right_bottom = np.array([new_annot_info['bbox'][0] + new_annot_info['bbox'][2],
new_annot_info['bbox'][1] + new_annot_info['bbox'][3], 1])
bottom_left = np.array([new_annot_info['bbox'][0],
new_annot_info['bbox'][1] + new_annot_info['bbox'][3], 1])
left_top = np.dot(M, left_top)
top_right = np.dot(M, top_right)
right_bottom = np.dot(M, right_bottom)
bottom_left = np.dot(M, bottom_left)
concat = np.vstack((left_top, top_right, right_bottom, bottom_left))
concat = concat.astype(np.int32)
rx, ry, rw, rh = cv2.boundingRect(concat)
if (rx >= w) or (ry >= h) or ((rx + w) <= 0) or ((ry + h) <= 0):
continue
rx = rx if rx > 0 else 0
ry = ry if ry > 0 else 0
new_annot_info['bbox'][0] = rx
new_annot_info['bbox'][1] = ry
new_annot_info['bbox'][2] = w - rx - 1 if rx + rw >= w else rw
new_annot_info['bbox'][3] = h - ry - 1 if ry + rh >= h else rh
new_annot_infos.append(new_annot_info)
if len(new_annot_infos) == 0:
return new_img_info, new_annot_infos
# 将新图片写入到新路径
new_path = os.path.join(self.new_root, self.model, new_img_info['file_name'])
if not os.path.exists(new_path):
cv2.imwrite(new_path, new_img)
return new_img_info, new_annot_infos
# mosaic
def mosaic(self, index_list, id):
img_infos = self.coco.loadImgs(ids=index_list)
center_x = max([i['width'] for i in img_infos])
center_y = max([i['height'] for i in img_infos])
new_img_info = {'id': id + 25000,
'width': center_x * 2,
'height': center_y * 2}
new_img_info['file_name'] = ''.join('0' for i in range(8 - len(str(new_img_info['id'])))) \
+ str(new_img_info['id']) + '.PNG'
new_annot_infos = []
for i, (img_info, img_id) in enumerate(zip(img_infos, index_list)):
img, (h, w) = self.get_img(os.path.join(self.root, self.model, img_info['file_name']))
annot_infos = self.coco.loadAnns(self.coco.getAnnIds(imgIds=img_id))
if i == 0:
new_img = np.zeros((center_y * 2, center_x * 2)).astype(img.dtype)
# top_left
x_min, y_min, x_max, y_max = center_x - w, center_y - h, center_x, center_y
elif i == 1:
# top_right
x_min, y_min, x_max, y_max = center_x, center_y - h, center_x + w, center_y
elif i == 2:
# bottom_left
x_min, y_min, x_max, y_max = center_x - w, center_y, center_x, center_y + h
elif i == 3:
# bottom_right
x_min, y_min, x_max, y_max = center_x, center_y, center_x + w, center_y + h
new_img[y_min: y_max, x_min: x_max] = img
for annot in annot_infos:
new_annot_info = annot.copy()
new_annot_info['id'] = self.mosaic_annot_id + 25000
new_annot_info['image_id'] = new_img_info['id']
new_annot_info['bbox'] = annot['bbox'].copy()
new_annot_info['bbox'][0] = x_min + new_annot_info['bbox'][0]
new_annot_info['bbox'][1] = y_min + new_annot_info['bbox'][1]
new_annot_infos.append(new_annot_info)
self.mosaic_annot_id += 1
# 将新图片写入到新路径
new_path = os.path.join(self.new_root, self.model, new_img_info['file_name'])
if not os.path.exists(new_path):
cv2.imwrite(new_path, new_img)
return new_img_info, new_annot_infos
def get_img(self, path):
img = cv2.imread(path)[:, :, 0]
h, w = img.shape
return img, (h, w)
DatasetAugment.py
from pycocotools.coco import COCO
import numpy as np
import os
import json
from AugmentMethodClass import AugmentMethod
import shutil
import random
from tqdm import tqdm
# roots
root = '/DAGM2007-COCO'
set_name = 'train'
new_root = '/DAGM2007-COCO-v2'
if not os.path.exists(new_root):
print("The path: '" + new_root + "' doesn't exist! Need to create new path")
os.makedirs(new_root)
os.makedirs(os.path.join(new_root, 'annotations'))
os.makedirs(os.path.join(new_root, 'train'))
dagm_coco = COCO(os.path.join(root, 'annotations', 'instances_' + set_name + '.json'))
augment = AugmentMethod(dagm_coco, root, new_root)
image_ids = dagm_coco.getImgIds()
dagm_images = dagm_coco.loadImgs(image_ids)
dagm_categories = dagm_coco.loadCats(dagm_coco.getCatIds())
dagm_annotations = dagm_coco.loadAnns(dagm_coco.getAnnIds(imgIds=image_ids))
# 遍历图像,将每张图像进行扩增
for i, index in enumerate(tqdm(image_ids)):
img_info = dagm_coco.loadImgs(ids=index)[0]
annot_infos = dagm_coco.loadAnns(dagm_coco.getAnnIds(imgIds=index))
# 原训练集复制到新路径下
image_path = os.path.join(root, set_name, img_info['file_name'])
new_image_path = os.path.join(new_root, set_name, img_info['file_name'])
shutil.copy(image_path, new_image_path)
# 原训练集水平翻转
flipped_img_info, flipped_annot_infos = augment.flip_image(img_info, annot_infos)
dagm_images.append(flipped_img_info)
for annot in flipped_annot_infos:
dagm_annotations.append(annot)
# 原训练集竖直翻转
vertical_flipped_img_info, vertical_flipped_annot_infos = augment.flip_image(img_info, annot_infos, horizion=False)
dagm_images.append(vertical_flipped_img_info)
for annot in vertical_flipped_annot_infos:
dagm_annotations.append(annot)
# 原训练集随机裁剪
crop_img_info, crop_annot_infos = augment.crop_image(img_info, annot_infos, i)
dagm_images.append(crop_img_info)
for annot in crop_annot_infos:
dagm_annotations.append(annot)
# 原训练集随机旋转
rotate_img_info, rotate_annot_infos = augment.rotate_image(img_info, annot_infos, i)
if len(rotate_annot_infos) != 0:
dagm_images.append(rotate_img_info)
for annot in rotate_annot_infos:
dagm_annotations.append(annot)
print('数据集已复制到新路径下,并进行(水平翻转、竖直翻转、随机裁剪、随机旋转)等方式的扩增')
print('现采用yolo中的mosaic方法对原数据集进行扩增')
# mosaic ing
for random_seed in tqdm(range(4)):
random.seed(random_seed)
random.shuffle(image_ids)
num_yolo = int(len(image_ids) / 4)
yolo_image_ids = np.resize(image_ids[: num_yolo * 4], (num_yolo, 4))
for i, index_list in enumerate(yolo_image_ids):
mosaic_img_info, mosaic_annot_infos = augment.mosaic(index_list, i + random_seed * len(yolo_image_ids))
dagm_images.append(mosaic_img_info)
for annot in mosaic_annot_infos:
dagm_annotations.append(annot)
coco = {'images': dagm_images, 'annotations': dagm_annotations, 'categories': dagm_categories}
file_name = f'{new_root}/annotations/instances_{set_name}.json'
if os.path.exists(file_name):
os.remove(file_name)
json.dump(coco, open(file_name, 'w'))
print('数据集扩增完成!!!!')
VOC格式的数据集扩增
思路和COCO格式的扩增方法一样,只是针对格式不同对代码做了一些调整
因为这个是用来扩增博主的其他数据集,扩增后图像的命名方式同上面的不太一样。这里是在原图像名称上增加_000、_011、_001、_010(水平翻转、垂直翻转、随机切割、随机旋转),随机四张拼接的图像名称是直接将原四张图象的名字拼接起来。默认是保存为.jpg格式的。原数据集图像格式也需要为.jpg,若为其他格式同样需要修改代码。
代码
AugmentMethodClass.py
import os
import copy
import random
import xml.etree.ElementTree as ET
from lxml import etree, objectify
import cv2.cv2 as cv2
import numpy as np
import shutil
class AugmentMethod:
def __init__(self, root, new_root, set='train'):
self.root = root
self.new_root = new_root
self.xml_root = os.path.join(root, 'Annotations')
self.images_root = os.path.join(root, 'JPEGImages')
self.names_root = os.path.join(root, 'ImageSets', 'Main')
self.name_list = list()
with open(os.path.join(self.names_root, f'{set}.txt'), 'r') as f:
for i in f.readlines():
self.name_list.append(i.split('\n')[0])
f.close()
def parseXmlFiles(self):
infos = list()
for name in self.name_list:
info = dict()
info['folder'] = self.new_root
info['filename'] = name + '.jpg'
xml_path = os.path.join(self.xml_root, name + '.xml')
tree = ET.parse(xml_path)
root = tree.getroot()
size = dict()
xml_size = root.find('size')
size['width'] = xml_size.find('width').text
size['height'] = xml_size.find('height').text
size['depth'] = xml_size.find('depth').text
info['size'] = size
objects = list()
xml_object_set = root.findall('object')
for xml_object in xml_object_set:
object = dict()
object['name'] = xml_object.find('name').text
object['pose'] = xml_object.find('pose').text
object['truncated'] = xml_object.find('truncated').text
object['difficult'] = xml_object.find('difficult').text
object['bndbox'] = dict()
for i in xml_object.find('bndbox'):
object['bndbox'][i.tag] = i.text
objects.append(object)
info['object'] = objects
infos.append(info)
return infos
# 水平翻转新图像名字后增加"_000"
# 垂直翻转新图像名字后增加"_011"
def flip_image(self, infos, horizion=True):
infos_ori = copy.deepcopy(infos)
name_list = list()
for info in infos_ori:
# 处理图像
img = cv2.imread(os.path.join(self.images_root, info['filename']), -1)
if horizion:
new_img = img[:, ::-1]
info['filename'] = info['filename'].split('.tif')[0] + "_000" + ".jpg"
else:
new_img = img[::-1, :]
info['filename'] = info['filename'].split('.tif')[0] + "_011" + ".jpg"
# 处理标注信息
for object in info['object']:
x_min = int(object['bndbox']['xmin'])
x_max = int(object['bndbox']['xmax'])
y_min = int(object['bndbox']['ymin'])
y_max = int(object['bndbox']['ymax'])
w = x_max - x_min
h = y_max - y_min
if horizion:
object['bndbox']['xmin'] = int(info['size']['width']) - x_min - w
object['bndbox']['xmax'] = int(object['bndbox']['xmin']) + w
else:
object['bndbox']['ymin'] = int(info['size']['height']) - y_min - h
object['bndbox']['ymax'] = int(object['bndbox']['ymin']) + h
new_path = os.path.join(self.new_root, 'JPEGImages', info['filename'])
cv2.imwrite(new_path, new_img)
self.write_xml(info)
name_list.append(info['filename'].split('.jpg')[0])
return name_list
# 新图像名字后增加"_001"
def crop_image(self, infos, random_seed):
name_list = list()
random.seed(random_seed)
infos_ori = copy.deepcopy(infos)
for info in infos_ori:
# 处理图像
img = cv2.imread(os.path.join(self.images_root, info['filename']), -1)
d_to_left, d_to_right, d_to_top, d_to_bottom = [], [], [], []
for obj in info['object']:
d_to_left.append(int(obj['bndbox']['xmin']))
d_to_right.append(int(info['size']['width']) - int(obj['bndbox']['xmax']))
d_to_top.append(int(obj['bndbox']['ymin']))
d_to_bottom.append(int(info['size']['height']) - int(obj['bndbox']['ymax']))
# 确定裁剪后的图片范围
range_crop_left = int(random.uniform(0, min(d_to_left)))
range_crop_right = int(info['size']['width']) - int(random.uniform(0, min(d_to_right)))
range_crop_top = int(random.uniform(0, min(d_to_top)))
range_crop_bottom = int(info['size']['height']) - int(random.uniform(0, min(d_to_bottom)))
# 将裁剪后的图像填充为原图像大小(0填充)
# np.array[行索引, 列索引]
crop_img = img[range_crop_top: range_crop_bottom, range_crop_left: range_crop_right]
new_img = np.zeros((int(info['size']['height']), int(info['size']['width']))).astype(img.dtype)
new_img[0: (range_crop_bottom - range_crop_top), 0: (range_crop_right - range_crop_left)] = crop_img
# 处理标注信息
info['filename'] = info['filename'].split('.jpg')[0] + "_001" + ".jpg"
for obj in info['object']:
x_min = int(obj['bndbox']['xmin'])
x_max = int(obj['bndbox']['xmax'])
y_min = int(obj['bndbox']['ymin'])
y_max = int(obj['bndbox']['ymax'])
w = x_max - x_min
h = y_max - y_min
obj['bndbox']['xmin'] = max(0, x_min - range_crop_left)
obj['bndbox']['ymin'] = max(0, y_min - range_crop_top)
obj['bndbox']['xmax'] = obj['bndbox']['xmin'] + w
obj['bndbox']['ymax'] = obj['bndbox']['ymin'] + h
name_list.append(info['filename'].split('.tif')[0])
new_path = os.path.join(self.new_root, 'JPEGImages', info['filename'])
cv2.imwrite(new_path, new_img)
self.write_xml(info)
return name_list
# 新图像名字后增加"_010"
def rotate_image(self, infos, random_seed):
infos_ori = copy.deepcopy(infos)
name_list = list()
random.seed(random_seed)
angle = int(random.uniform(0, 180))
for info in infos_ori:
# 处理图像
img = cv2.imread(os.path.join(self.images_root, info['filename']), -1)
cx, cy = int(info['size']['width']) // 2, int(info['size']['height']) // 2
M = cv2.getRotationMatrix2D((cx, cy), angle, 1.0)
new_img = cv2.warpAffine(img, M, (int(info['size']['width']), int(info['size']['height'])))
# 处理标注信息
info['filename'] = info['filename'].split('.jpg')[0] + "_010" + ".jpg"
for obj in info['object']:
x_min = int(obj['bndbox']['xmin'])
x_max = int(obj['bndbox']['xmax'])
y_min = int(obj['bndbox']['ymin'])
y_max = int(obj['bndbox']['ymax'])
w = x_max - x_min
h = y_max - y_min
left_top = np.array([x_min, y_min, 1])
top_right = np.array([x_max, y_min, 1])
right_bottom = np.array([x_max, y_max, 1])
bottom_left = np.array([x_min, y_max, 1])
left_top = np.dot(M, left_top)
top_right = np.dot(M, top_right)
right_bottom = np.dot(M, right_bottom)
bottom_left = np.dot(M, bottom_left)
concat = np.vstack((left_top, top_right, right_bottom, bottom_left))
concat = concat.astype(np.int32)
rx, ry, rw, rh = cv2.boundingRect(concat)
if (rx >= int(info['size']['width'])) \
or (ry >= int(info['size']['height'])) \
or ((rx + int(info['size']['width'])) <= 0) \
or ((ry + int(info['size']['height'])) <= 0):
info['object'].remove(obj)
rx = rx if rx > 0 else 0
ry = ry if ry > 0 else 0
obj['bndbox']['xmin'] = rx
obj['bndbox']['ymin'] = ry
rw = int(info['size']['width']) - rx - 1 if rx + rw >= int(info['size']['width']) else rw
rh = int(info['size']['height']) - ry - 1 if ry + rh >= int(info['size']['height']) else rh
obj['bndbox']['xmax'] = rx + rw
obj['bndbox']['ymax'] = ry + rh
if len(info['object']) == 0:
continue
name_list.append(info['filename'].split('.jpg')[0])
new_path = os.path.join(self.new_root, 'JPEGImages', info['filename'])
cv2.imwrite(new_path, new_img)
self.write_xml(info)
return name_list
# 四张图象名字拼接
def mosaic(self, all_infos):
name_list = list()
for random_seed in range(4):
infos_ori = copy.deepcopy(all_infos)
random.seed(random_seed)
random.shuffle(infos_ori)
num_info = int(len(infos_ori) / 4)
resized_infos = np.resize(infos_ori[: num_info * 4], (num_info, 4))
for dfhhedd, infos in enumerate(resized_infos):
center_x = max([int(i['size']['width']) for i in infos])
center_y = max([int(i['size']['height']) for i in infos])
new_info = dict()
new_info['folder'] = self.new_root
new_info['filename'] = infos[0]['filename'].split('.jpg')[0] + '_' + \
infos[1]['filename'].split('.jpg')[0] + '_' + \
infos[2]['filename'].split('.jpg')[0] + '_' + \
infos[3]['filename'].split('.jpg')[0] + '.jpg'
new_info['size'] = {'width': center_x * 2, 'height': center_y * 2, 'depth': 1}
new_info['object'] = list()
for j, info in enumerate(infos):
img = cv2.imread(os.path.join(self.images_root, info['filename']), -1)
w = int(info['size']['width'])
h = int(info['size']['height'])
if j == 0:
new_img = np.zeros((center_y * 2, center_x * 2)).astype(img.dtype)
# top_left
x_min, y_min, x_max, y_max = center_x - w, center_y - h, center_x, center_y
elif j == 1:
# top_right
x_min, y_min, x_max, y_max = center_x, center_y - h, center_x + w, center_y
elif j == 2:
# bottom_left
x_min, y_min, x_max, y_max = center_x - w, center_y, center_x, center_y + h
elif j == 3:
# bottom_right
x_min, y_min, x_max, y_max = center_x, center_y, center_x + w, center_y + h
new_img[y_min: y_max, x_min: x_max] = img
for obj in info['object']:
obj['bndbox']['xmin'] = x_min + int(obj['bndbox']['xmin'])
obj['bndbox']['ymin'] = y_min + int(obj['bndbox']['ymin'])
obj['bndbox']['xmax'] = x_min + int(obj['bndbox']['xmax'])
obj['bndbox']['ymax'] = y_min + int(obj['bndbox']['ymax'])
new_info['object'].append(obj)
name_list.append(new_info['filename'].split('.jpg')[0])
new_path = os.path.join(self.new_root, 'JPEGImages', new_info['filename'])
cv2.imwrite(new_path, new_img)
self.write_xml(new_info)
return name_list
def write_xml(self, info):
E = objectify.ElementMaker(annotate=False)
anno_tree = E.annotation(
E.folder(self.new_root),
E.filename(info['filename']),
E.size(
E.width(info['size']['width']),
E.height(info['size']['height']),
E.depth(info['size']['depth'])
),
E.segmented(0)
)
for obj in info['object']:
E2 = objectify.ElementMaker(annotate=False)
anno_tree2 = E2.object(
E.name(obj['name']),
E.pose(obj['pose']),
E.truncated(obj['truncated']),
E.difficult(obj['difficult']),
E.bndbox(
E.xmin(obj['bndbox']['xmin']),
E.ymin(obj['bndbox']['ymin']),
E.xmax(obj['bndbox']['xmax']),
E.ymax(obj['bndbox']['ymax'])
)
)
anno_tree.append(anno_tree2)
save_path = os.path.join(self.new_root, 'Annotations', info['filename'].split('.jpg')[0] + '.xml')
etree.ElementTree(anno_tree).write(save_path, pretty_print=True)
def copy(self, infos):
name_list = list()
for info in infos:
shutil.copy(os.path.join(self.images_root, info['filename']),
os.path.join(self.new_root, 'JPEGImages', info['filename']))
self.write_xml(info)
name_list.append(info['filename'].split('.jpg')[0])
return name_list
DatasetAugment.py
import os
import random
from AugmentMethodClass import AugmentMethod
root = '/lalala_VOC'
new_root = '/lalala_VOC_v2'
if not os.path.exists(new_root):
os.makedirs(os.path.join(new_root, 'Annotations'))
os.makedirs(os.path.join(new_root, 'JPEGImages'))
os.makedirs(os.path.join(new_root, 'ImageSets', 'Main'))
else:
os.remove(new_root)
VOC_dataset = AugmentMethod(root, new_root)
train_infos = VOC_dataset.parseXmlFiles()
val_VOC_dataset = AugmentMethod(root, new_root, 'val')
val_infos = val_VOC_dataset.parseXmlFiles()
train_name_list = list()
random_seed = 815
name_list0 = VOC_dataset.copy(train_infos)
name_list1 = VOC_dataset.flip_image(train_infos)
name_list2 = VOC_dataset.flip_image(train_infos, False)
name_list3 = VOC_dataset.crop_image(train_infos, random_seed)
name_list4 = VOC_dataset.rotate_image(train_infos, random_seed)
name_list5 = VOC_dataset.mosaic(train_infos)
train_name_list.extend(name_list0)
train_name_list.extend(name_list1)
train_name_list.extend(name_list2)
train_name_list.extend(name_list3)
train_name_list.extend(name_list4)
train_name_list.extend(name_list5)
with open(os.path.join(new_root, 'ImageSets', 'Main', 'train.txt'), 'w') as f:
for i, name in enumerate(train_name_list):
f.write(name)
f.write('\n')
# print(i)
val_name_list = val_VOC_dataset.copy(val_infos)
with open(os.path.join(new_root, 'ImageSets', 'Main', 'val.txt'), 'w') as f:
for i, name in enumerate(val_name_list):
f.write(name)
f.write('\n')
# print(i)