目录
以下脚本可直接从百度网盘下载:
链接:https://pan.baidu.com/s/1ofQgoePombKPh86o7u4wsw
提取码:1234
注意:
以下脚本的使用需要依赖另两个脚本: voc_xml.py (生成标签文件)、utils.py
请大家在使用以下脚本时,在同级目录下创建这两个文件
1、 voc_xml.py :
from xml.dom.minidom import Document
import xml.etree.ElementTree as ET
import os
def get_xml_tree(xmlfile):
'''
获取xml tree
Args:
xmlfile: xml文件路径
return:
tree:xml tree
'''
tree = ET.parse(xmlfile)
return tree
class CreateXML():
def __init__(self, img_name, img_w, img_h, img_channels):
'''
Args:
img_name:图片名
img_w,img_h,img_channels:图片宽、高、通道数
'''
self.img_name = img_name
self.doc = Document()
self.annotation = self.doc.createElement('annotation')
self.doc.appendChild(self.annotation)
folder = self.doc.createElement('folder')
folder.appendChild(self.doc.createTextNode("AIA AUTO"))
self.annotation.appendChild(folder)
filename = self.doc.createElement('filename')
filename.appendChild(self.doc.createTextNode(img_name))
self.annotation.appendChild(filename)
source = self.doc.createElement('source')
database = self.doc.createElement('database')
database.appendChild(self.doc.createTextNode('The AUTO Database'))
anno = self.doc.createElement("annotation")
anno.appendChild(self.doc.createTextNode("AUTO by zxl"))
image = self.doc.createElement("JPEGImages")
image.appendChild(self.doc.createTextNode("flickr"))
source.appendChild(database)
source.appendChild(anno)
source.appendChild(image)
self.annotation.appendChild(source)
sizeimage = self.doc.createElement('size')
imagewidth = self.doc.createElement('width')
imagewidth.appendChild(self.doc.createTextNode(str(img_w)))
imageheight = self.doc.createElement('height')
imageheight.appendChild(self.doc.createTextNode(str(img_h)))
imagedepth = self.doc.createElement("depth")
imagedepth.appendChild(self.doc.createTextNode(str(img_channels)))
sizeimage.appendChild(imagewidth)
sizeimage.appendChild(imageheight)
sizeimage.appendChild(imagedepth)
self.annotation.appendChild(sizeimage)
def add_object_node(self, obj_name, xmin_v, ymin_v, xmax_v, ymax_v, truncated_v=0, difficult_v=0):
'''
添加目标框节点
obj_name:目标名
xmin_v,ymin_v,xmax_v,ymax_v:目标框左上右上坐标
truncated_v:截断程度
difficult:困难程度
'''
obj = self.doc.createElement("object")
objname = self.doc.createElement("name")
objname.appendChild(self.doc.createTextNode(obj_name))
pose = self.doc.createElement("pose")
pose.appendChild(self.doc.createTextNode("front"))
truncated = self.doc.createElement("truncated")
truncated.appendChild(self.doc.createTextNode(str(truncated_v)))
difficult = self.doc.createElement('difficult')
difficult.appendChild(self.doc.createTextNode(str(difficult_v)))
obj.appendChild(objname)
obj.appendChild(pose)
obj.appendChild(truncated)
obj.appendChild(difficult)
bndbox = self.doc.createElement("bndbox")
xmin = self.doc.createElement("xmin")
ymin = self.doc.createElement("ymin")
xmax = self.doc.createElement("xmax")
ymax = self.doc.createElement("ymax")
xmin.appendChild(self.doc.createTextNode(str(xmin_v)))
ymin.appendChild(self.doc.createTextNode(str(ymin_v)))
xmax.appendChild(self.doc.createTextNode(str(xmax_v)))
ymax.appendChild(self.doc.createTextNode(str(ymax_v)))
bndbox.appendChild(xmin)
bndbox.appendChild(ymin)
bndbox.appendChild(xmax)
bndbox.appendChild(ymax)
obj.appendChild(bndbox)
self.annotation.appendChild(obj)
def save_xml(self, save_path, xml_save_name):
'''
save_path:保存路径
xml_save_name:xml文件保存名字
'''
xml_file = open(os.path.join(save_path, xml_save_name), 'w')
xml_file.write(self.doc.toprettyxml(indent=' ' * 4))
def get_doc(self):
'''
return:
doc:xml文件的Document()
'''
return self.doc
2、utils.py
import os
import random
def confine(value, v_min, v_max):
'''
值的边界限制
Args:
value:输入值
v_min,v_max:最大最小边界
return:
value:限制值
'''
value = v_min if value < v_min else value
value = v_max if value > v_max else value
return value
def fileCountIn(dir):
'''
计算文件夹下文件个数
Args:
dir:文件目录
return:
文件个数
'''
return sum([len(files) for root, dirs, files in os.walk(dir)])
def randomChoiceIn(dir):
'''
目录下随机选择一个文件
Args:
dir:目录
return:
filename:随机选择的文件名
'''
for root, dirs, files in os.walk(dir):
index = random.randint(0, len(files) - 1)
filename = files[index]
return filename
def calc_rect_area(rect):
'''计算矩形框面积
Args:
rect:矩形框 [xmin,ymin,xmax,ymax]
return:
dst:矩形框面积
'''
return (rect[2] - rect[0] + 0.001) * (rect[3] - rect[1] + 0.001)
def calc_iou(rect1, rect2):
'''计算两个矩形框的交并比
Args:
rect1,rect2:两个矩形框
return:
iou:交并比
'''
bd_i = (max(rect1[0], rect2[0]), max(rect1[1], rect2[1]), \
min(rect1[2], rect2[2]), min(rect1[3], rect2[3]))
iw = bd_i[2] - bd_i[0] + 0.001
ih = bd_i[3] - bd_i[1] + 0.001
iou = 0
if (iw > 0 and ih > 0):
ua = calc_rect_area(rect1) + calc_rect_area(rect2) - iw * ih
iou = iw * ih / ua
return iou
一、随机裁剪
import cv2
import os
import random
import voc_xml
import utils
from voc_xml import CreateXML
def crop_img(src, top_left_x, top_left_y, crop_w, crop_h):
'''裁剪图像
Args:
src: 源图像
top_left,top_right:裁剪图像左上角坐标
crop_w,crop_h:裁剪图像宽高
return:
crop_img:裁剪后的图像
None:裁剪尺寸错误
'''
rows, cols, n_channel = src.shape
row_min, col_min = int(top_left_y), int(top_left_x)
row_max, col_max = int(row_min + crop_h), int(col_min + crop_w)
if row_max > rows or col_max > cols:
print("crop size err: src->%dx%d,crop->top_left(%d,%d) %dx%d" % (
cols, rows, col_min, row_min, int(crop_w), int(crop_h)))
return None
crop_img = src[row_min:row_max, col_min:col_max]
return crop_img
def crop_xy(x, y, top_left_x, top_left_y, crop_w, crop_h):
''' 坐标平移变换
Args:
x,y:待变换坐标
top_left_x,top_left_y:裁剪图像左上角坐标
crop_w,crop_h:裁剪部分图像宽高
return:
crop_x,crop_y
'''
crop_x = int(x - top_left_x)
crop_y = int(y - top_left_y)
crop_x = utils.confine(crop_x, 0, crop_w - 1)
crop_y = utils.confine(crop_y, 0, crop_h - 1)
return crop_x, crop_y
def crop_box(box, top_left_x, top_left_y, crop_w, crop_h, iou_thr=0.5):
'''目标框坐标平移变换
Args:
box:目标框坐标[xmin,ymin,xmax,ymax]
top_left_x,top_left_y:裁剪图像左上角坐标
crop_w,crop_h:裁剪部分图像宽高
iou_thr: iou阈值,去除裁剪后过小目标
return:
crop_box:平移变换结果[xmin,ymin,xmax,ymax]
'''
xmin, ymin = crop_xy(box[0], box[1], top_left_x, top_left_y, crop_w, crop_h)
xmax, ymax = crop_xy(box[2], box[3], top_left_x, top_left_y, crop_w, crop_h)
croped_box = [xmin, ymin, xmax, ymax]
if utils.calc_iou([0, 0, box[2] - box[0], box[3] - box[1]], [0, 0, xmax - xmin, ymax - ymin]) < iou_thr:
croped_box = [0, 0, 0, 0]
return croped_box
def crop_xml(crop_img_name, xml_tree, top_left_x, top_left_y, crop_w, crop_h, iou_thr=0.5):
'''xml目标框裁剪变换
Args:
crop_img_name:裁剪图片命名
xml_tree:待crop的xml ET.parse()
top_left_x,top_left_y: 裁剪图像左上角坐标
crop_w,crop_h: 裁剪图像宽高
iou_thr: iou阈值
return:
createdxml : 创建的xml CreateXML对象
'''
root = xml_tree.getroot()
size = root.find('size')
depth = int(size.find('depth').text)
createdxml = CreateXML(crop_img_name, int(crop_w), int(crop_h), depth)
for obj in root.iter('object'):
obj_name = obj.find('name').text
xml_box = obj.find('bndbox')
xmin = int(xml_box.find('xmin').text)
ymin = int(xml_box.find('ymin').text)
xmax = int(xml_box.find('xmax').text)
ymax = int(xml_box.find('ymax').text)
box = crop_box([xmin, ymin, xmax, ymax], top_left_x, top_left_y, crop_w, crop_h, iou_thr)
if (box[0] >= box[2]) or (box[1] >= box[3]):
continue
createdxml.add_object_node(obj_name, box[0], box[1], box[2], box[3])
return createdxml
def crop_img_xml(img, xml_tree, crop_img_name, top_left_x, top_left_y, crop_w, crop_h, iou_thr):
'''裁剪图像和xml目标框
Args:
img:源图像
crop_img_name:裁剪图片命名
xml_tree:待crop的xml ET.parse()
top_left_x,top_left_y: 裁剪图像左上角坐标
crop_w,crop_h: 裁剪图像宽高
iou_thr: iou阈值
return:
croped_img,croped_xml : 裁剪完成的图像和xml文件
None:裁剪尺寸错误
'''
croped_img = crop_img(img, top_left_x, top_left_y, crop_w, crop_h)
if croped_img is None:
return None
croped_xml = crop_xml(crop_img_name, xml_tree, top_left_x, top_left_y, crop_w, crop_h, iou_thr)
return croped_img, croped_xml
def crop_img_xml_from_dir(imgs_dir, xmls_dir, imgs_save_dir, xmls_save_dir, img_suffix, name_suffix, \
crop_type='RANDOM_CROP', crop_n=1, dsize=(0, 0), fw=1.0, fh=1.0, random_wh=False,
iou_thr=0.5):
'''随机裁剪指定路径下的图片和xml
Args:
imgs_dir,xmls_dir: 待放缩图片、原始xml文件存储路径
imgs_save_dir,xmls_save_dir: 处理完成的图片、xml文件存储路径
img_suffix: 图片可能的后缀名['.jpg','.png','.bmp',..]
name_suffix: 处理完成的图片、xml的命名标识
crop_type:裁剪风格 ['RANDOM_CROP','CENTER_CROP','FIVE_CROP']
crop_n: 每原图生成裁剪图个数
dsize:指定crop宽高(w,h),与random_wh==True互斥生效
fw,fh: 当random_wh==False时为crop比例,否则为随机crop的宽高比例下限
random_wh:随机选定裁剪宽高
iou_thr: iou阈值
'''
for root, dirs, files in os.walk(xmls_dir):
for xml_name in files:
xml_file = os.path.join(xmls_dir, xml_name)
# print(xml_file)
img_file = None
for suffix in img_suffix:
# print(os.path.join(imgs_dir,xml_name.split('.')[0]+suffix))
if os.path.exists(os.path.join(imgs_dir, xml_name.split('.')[0] + suffix)):
img_file = os.path.join(imgs_dir, xml_name.split('.')[0] + suffix)
break
if img_file is None:
print("there has no JPEGImages for ", xml_name)
continue
img = cv2.imread(img_file)
imgh, imgw, n_channels = img.shape
if crop_type == 'CENTER_CROP':
crop_n = 1
elif crop_type == 'FIVE_CROP':
crop_n = 5
for i in range(crop_n):
crop_imgw, crop_imgh = dsize
if dsize == (0, 0) and not random_wh:
crop_imgw = int(imgw * fw)
crop_imgh = int(imgh * fh)
elif random_wh:
crop_imgw = int(imgw * (fw + random.random() * (1 - fw)))
crop_imgh = int(imgh * (fh + random.random() * (1 - fh)))
if crop_type == 'RANDOM_CROP':
crop_top_left_x, crop_top_left_y = random.randint(0, imgw - crop_imgw - 1), random.randint(0,
imgh - crop_imgh - 1)
elif crop_type == 'CENTER_CROP':
crop_top_left_x, crop_top_left_y = int(imgw / 2 - crop_imgw / 2), int(imgh / 2 - crop_imgh / 2)
elif crop_type == 'FIVE_CROP':
if i == 0:
crop_top_left_x, crop_top_left_y = 0, 0
elif i == 1:
crop_top_left_x, crop_top_left_y = imgw - crop_imgw - 1, 0
elif i == 2:
crop_top_left_x, crop_top_left_y = 0, imgh - crop_imgh - 1
elif i == 3:
crop_top_left_x, crop_top_left_y = imgw - crop_imgw - 1, imgh - crop_imgh - 1
else:
crop_top_left_x, crop_top_left_y = int(imgw / 2 - crop_imgw / 2), int(imgh / 2 - crop_imgh / 2)
else:
print('crop type wrong! expect [RANDOM_CROP,CENTER_CROP,FIVE_CROP]')
croped_img_name = xml_name.split('.')[0] + '_' + name_suffix + \
str(crop_top_left_x) + '_' + str(crop_top_left_y) + \
'_wh' + str(crop_imgw) + 'x' + str(crop_imgh) + \
'.' + img_file.split('.')[-1]
croped = crop_img_xml(img, voc_xml.get_xml_tree(xml_file), croped_img_name, crop_top_left_x,
crop_top_left_y, crop_imgw, crop_imgh, iou_thr)
imgcrop, xmlcrop = croped[0], croped[1]
cv2.imwrite(os.path.join(imgs_save_dir, croped_img_name), imgcrop)
xmlcrop.save_xml(xmls_save_dir, croped_img_name.split('.')[0] + '.xml')
def crop_imgs_without_label(imgs_dir, imgs_save_dir, name_suffix, crop_type='RANDOM_CROP', \
crop_n=1, dsize=(0, 0), fw=1.0, fh=1.0, random_wh=False):
'''仅裁剪图片,不带标签
Args:
imgs_dir: 待放缩图片、原始xml文件存储路径
imgs_save_dir: 处理完成的图片、xml文件存储路径
name_suffix: 处理完成的图片、xml的命名标识
crop_type:裁剪风格 ['RANDOM_CROP','CENTER_CROP','FIVE_CROP']
crop_n: 每原图生成裁剪图个数
dsize:指定crop宽高(w,h),与random_wh==True互斥生效
fw,fh: 当random_wh==False时为crop比例,否则为随机crop的宽高比例下限
random_wh:随机选定裁剪宽高
'''
imgcount = utils.fileCountIn(imgs_dir)
count = 0
for root, dirs, files in os.walk(imgs_dir):
for file in files:
img_file = os.path.join(imgs_dir, file)
img = cv2.imread(img_file)
imgh, imgw, n_channels = img.shape
if crop_type == 'CENTER_CROP':
crop_n = 1
elif crop_type == 'FIVE_CROP':
crop_n = 5
for i in range(crop_n):
crop_imgw, crop_imgh = dsize
if dsize == (0, 0) and not random_wh:
crop_imgw = int(imgw * fw)
crop_imgh = int(imgh * fh)
elif random_wh:
crop_imgw = int(imgw * (fw + random.random() * (1 - fw)))
crop_imgh = int(imgh * (fh + random.random() * (1 - fh)))
if crop_type == 'RANDOM_CROP':
crop_top_left_x, crop_top_left_y = random.randint(0, imgw - crop_imgw - 1), random.randint(0,
imgh - crop_imgh - 1)
elif crop_type == 'CENTER_CROP':
crop_top_left_x, crop_top_left_y = int(imgw / 2 - crop_imgw / 2), int(imgh / 2 - crop_imgh / 2)
elif crop_type == 'FIVE_CROP':
if i == 0:
crop_top_left_x, crop_top_left_y = 0, 0
elif i == 1:
crop_top_left_x, crop_top_left_y = imgw - crop_imgw - 1, 0
elif i == 2:
crop_top_left_x, crop_top_left_y = 0, imgh - crop_imgh - 1
elif i == 3:
crop_top_left_x, crop_top_left_y = imgw - crop_imgw - 1, imgh - crop_imgh - 1
else:
crop_top_left_x, crop_top_left_y = int(imgw / 2 - crop_imgw / 2), int(imgh / 2 - crop_imgh / 2)
else:
print('crop type wrong! expect [RANDOM_CROP,CENTER_CROP,FIVE_CROP]')
croped_img_name = file.split('.')[0] + '_' + name_suffix + \
str(crop_top_left_x) + '_' + str(crop_top_left_y) + \
'_wh' + str(crop_imgw) + 'x' + str(crop_imgh) + \
'.jpg'
croped_img = crop_img(img, crop_top_left_x, crop_top_left_y, crop_imgw, crop_imgh)
cv2.imwrite(os.path.join(imgs_save_dir, croped_img_name), croped_img)
count += 1
if count % 10 == 0:
print('[%d|%d] %d%%' % (count, imgcount, count * 100 / imgcount))
def main():
imgs_dir = 'C:/Users/pc/Desktop/JPEGImages/'
xmls_dir = 'C:/Users/pc/Desktop/Annotations/'
imgs_save_dir = 'C:/Users/pc/Desktop/image_crop/'
if not os.path.exists(imgs_save_dir):
os.makedirs(imgs_save_dir)
xmls_save_dir = 'C:/Users/pc/Desktop/label_crop/'
if not os.path.exists(xmls_save_dir):
os.makedirs(xmls_save_dir)
img_suffix = ['.jpg', '.png', '.bmp']
name_suffix = 'crop' # 命名标识
crop_type = 'RANDOM_CROP' # ['RANDOM_CROP','CENTER_CROP','FIVE_CROP']
crop_n = 5 # 每张原图 crop 5张图
dsize = (400, 300) # 指定裁剪尺度
fw = 0.5
fh = 0.7 # 指定裁剪尺度比例
random_wh = False # 是否随机尺度裁剪,若为True,则dsize指定的尺度失效
iou_thr = 0.5 # 裁剪后目标框大小与原框大小的iou值大于该阈值则保留
crop_img_xml_from_dir(imgs_dir, xmls_dir, imgs_save_dir, xmls_save_dir, img_suffix, name_suffix, \
crop_type, crop_n, dsize, fw, fh, random_wh, iou_thr)
# crop_imgs_without_label(imgs_dir,imgs_save_dir,name_suffix,crop_type,\
# crop_n,dsize,fw,fh,random_wh)
if __name__ == '__main__':
main()
二、翻转
import cv2
import os
import random
import voc_xml
from voc_xml import CreateXML
def flip_img(src, flip_type):
'''翻转图像
Args:
src:输入图像
flip_type:翻转类型,1水平翻转,0垂直翻转,-1水平垂直翻转
return:
fliped_img:翻转后的图像
'''
fliped_img = cv2.flip(src, flip_type)
return fliped_img
def flip_xy(x, y, imgw, imgh, flip_type):
'''翻转坐标点
Args:
x,y:坐标点
imgw,imgh:翻转图像宽高
flip_type:翻转类型,1水平翻转,0垂直翻转,-1水平垂直翻转
return:
fliped_x,fliped_y:翻转后坐标
'''
if 1 == flip_type:
fliped_x = imgw - x
fliped_y = y
elif 0 == flip_type:
fliped_x = x
fliped_y = imgh - y
elif -1 == flip_type:
fliped_x = imgw - x
fliped_y = imgh - y
else:
print('flip type err')
return
return fliped_x, fliped_y
def flip_box(box, imgw, imgh, flip_type):
'''翻转目标框
Args:
box:目标框坐标[xmin,ymin,xmax,ymax]
imgw,imgh:图像宽高
flip_type:翻转类型,1水平翻转,0垂直翻转,-1水平垂直翻转
return:
fliped_box:翻转后的目标框
'''
x1, y1 = flip_xy(box[0], box[1], imgw, imgh, flip_type)
x2, y2 = flip_xy(box[2], box[3], imgw, imgh, flip_type)
xmin, xmax = min(x1, x2), max(x1, x2)
ymin, ymax = min(y1, y2), max(y1, y2)
fliped_box = [xmin, ymin, xmax, ymax]
return fliped_box
def flip_xml(flip_img_name, xml_tree, flip_type):
'''翻转xml
Args:
flip_img_name:翻转后图片保存名
xml_tree:待翻转的xml ET.parse()
flip_type:翻转类型,1水平翻转,0垂直翻转,-1水平垂直翻转
return:
createdxml : 创建的xml CreateXML对象
'''
root = xml_tree.getroot()
size = root.find('size')
imgw, imgh, depth = int(size.find('width').text), int(size.find('height').text), int(size.find('depth').text)
createdxml = CreateXML(flip_img_name, int(imgw), int(imgh), depth)
for obj in root.iter('object'):
obj_name = obj.find('name').text
xml_box = obj.find('bndbox')
xmin = int(xml_box.find('xmin').text)
ymin = int(xml_box.find('ymin').text)
xmax = int(xml_box.find('xmax').text)
ymax = int(xml_box.find('ymax').text)
box = flip_box([xmin, ymin, xmax, ymax], imgw, imgh, flip_type)
if (box[0] >= box[2]) or (box[1] >= box[3]):
continue
createdxml.add_object_node(obj_name, box[0], box[1], box[2], box[3])
return createdxml
def flip_img_xml(img, xml_tree, flip_img_name, flip_type):
'''翻转图像和xml目标框
Args:
img:源图像
xml_tree:待crop的xml ET.parse()
crop_img_name:翻转图片命名
flip_type:翻转类型
return:
fliped_img,fliped_xml : 裁剪完成的图像和xml文件
'''
fliped_img = flip_img(img, flip_type)
fliped_xml = flip_xml(flip_img_name, xml_tree, flip_type)
return fliped_img, fliped_xml
def flip_img_xml_from_dir(imgs_dir, xmls_dir, imgs_save_dir, xmls_save_dir, img_suffix, name_suffix, \
flip_types, random_flip=False):
'''翻转指定路径下所有图片和xml
Args:
imgs_dir,xmls_dir:待翻转图片和xml路径
imgs_save_dir,xmls_save_dir:图片和xml保存路径
img_suffix:图片可能的后缀名['.jpg','.png','.bmp',..]
name_suffix: 处理完成的图片、xml的命名标识
flip_types: 每张图执行的翻转类型[type1,type2,...],翻转类型共三种,1水平翻转,0垂直翻转,-1水平垂直翻转
random_flip:是否随机选择翻转类型,与flip_type互斥
'''
for root, dirs, files in os.walk(xmls_dir):
for xml_name in files:
xml_file = os.path.join(xmls_dir, xml_name)
# print(xml_file)
img_file = None
for suffix in img_suffix:
# print(os.path.join(imgs_dir,xml_name.split('.')[0]+suffix))
if os.path.exists(os.path.join(imgs_dir, xml_name.split('.')[0] + suffix)):
img_file = os.path.join(imgs_dir, xml_name.split('.')[0] + suffix)
break
if img_file is None:
print("there has no JPEGImages for ", xml_name)
continue
img = cv2.imread(img_file)
types = flip_types
if random_flip:
types = [random.randint(-1, 1)]
for tp in types:
flip_img_name = xml_name.split('.')[0] + '_' + name_suffix + '_type' + str(tp) + '.' + \
img_file.split('.')[-1]
imgflip, xmlflip = flip_img_xml(img, voc_xml.get_xml_tree(xml_file), flip_img_name, tp)
cv2.imwrite(os.path.join(imgs_save_dir, flip_img_name), imgflip)
xmlflip.save_xml(xmls_save_dir, flip_img_name.split('.')[0] + '.xml')
def main():
imgs_dir = 'C:/Users/zxl/Desktop/test/JPEGImages/'
xmls_dir = 'C:/Users/zxl/Desktop/test/Annotations/'
imgs_save_dir = 'C:/Users/zxl/Desktop/test/flip_imgs/'
if not os.path.exists(imgs_save_dir):
os.makedirs(imgs_save_dir)
xmls_save_dir = 'C:/Users/zxl/Desktop/test/flip_xmls/'
if not os.path.exists(xmls_save_dir):
os.makedirs(xmls_save_dir)
img_suffix = ['.jpg', '.png', '.bmp']
name_suffix = 'flip' # 命名标识
flip_types = [1, 0, -1] # 指定每张图翻转类型 1水平翻转,0垂直翻转,-1水平垂直翻转
random_flip = False # 随机翻转 与flip_types指定类型互斥
flip_img_xml_from_dir(imgs_dir, xmls_dir, imgs_save_dir, xmls_save_dir, img_suffix, name_suffix, flip_types,
random_flip)
if __name__ == '__main__':
main()
三、拼接图片
import cv2
import os
import random
import copy
import numpy as np
import rotate
import resize
import flip
import crop
import voc_xml
import utils
from voc_xml import CreateXML
def mosaic_img(img, part_img, start_row, start_col):
'''嵌入子图
Args:
img:大图
part_img:待嵌入子图
start_row,start_col:子图嵌入起始行列
return:
img:嵌入结果图
'''
rows, cols, n_channel = part_img.shape
img[start_row:start_row + rows, start_col:start_col + cols] = part_img
return img
def translational_box(box, start_row, start_col):
'''平移box坐标
Args:
box:边框坐标[xmin,ymin,xmax,ymax]
start_row,start_col:子图嵌入起始行列
return:
trans_box:平移后边框坐标
'''
trans_box = [box[0] + start_col, box[1] + start_row, box[2] + start_col, box[3] + start_row]
return trans_box
def transform_box(box, transforms):
'''目标框坐标转换
Args:
box:目标框[xmin,ymin,xmax,ymax]
transforms:转换操作[{'opt':'rotate','cterxy':[],'imgwh':[],'rot_angle':0,'randomRotation':False,\
'randomAngleRange':[0,360],'scale':1.0,'correction':True,'bk_imgs_dir':'xxx'},
{'opt':'crop','crop_type':RANDOM_CROP,'dsize':(0,0),'top_left_x':0,'top_left_y':0,'fw':0.5,'fh':0.7,'random_wh':False ,'iou_thr':0.5},
{'opt':'flip','flip_type':-1,'random_flip':True,'imgwh':[]},
{'opt':'resize','fx':0.5,'fy':0.5,'dsize':(0,0),'imgwh':[]}]
return:
transformed_box:转换后目标框坐标[xmin,ymin,xmax,ymax]
'''
transformed_box = box
for operate in transforms:
if [0, 0, 0, 0] == transformed_box:
break
if transformed_box[2] > operate['imgwh'][0] or transformed_box[3] > operate['imgwh'][1]:
print(operate['opt'])
print(operate['imgwh'])
print(transformed_box)
if 'resize' == operate['opt']:
transformed_box = resize.resize_box(transformed_box, operate['fx'], operate['fy'])
elif 'rotate' == operate['opt']:
# box,cterxy,imgwh,rot_angle,scale=1.0,correction=True
tmp_box = rotate.rot_box(transformed_box, operate['cterxy'], operate['imgwh'], operate['rot_angle'],
operate['scale'], operate['correction'])
imgw, imgh = operate['imgwh'][0], operate['imgwh'][1]
transformed_box = [utils.confine(tmp_box[0], 0, imgw - 1), utils.confine(tmp_box[1], 0, imgh - 1),
utils.confine(tmp_box[4], 0, imgw - 1), utils.confine(tmp_box[5], 0, imgh - 1)]
elif 'crop' == operate['opt']:
transformed_box = crop.crop_box(transformed_box, operate['top_left_x'], operate['top_left_y'],
operate['crop_w'], operate['crop_h'], operate['iou_thr'])
elif 'flip' == operate['opt']:
transformed_box = flip.flip_box(transformed_box, operate['imgwh'][0], operate['imgwh'][1],
operate['flip_type'])
return transformed_box
def transform_xml(part_xml_tree, createdxml, transforms, start_row, start_col):
'''将子图的标注框添加到总图的xml中
Args:
part_xml_tree:子图xml ET.parse()
createdxml:总图创建的xml CreateXML对象
transforms:转换操作
start_row,start_col:子图嵌入起始行列
return:
createdxml: 总图创建的xml CreateXML对象
'''
root = part_xml_tree.getroot()
for obj in root.iter('object'):
obj_name = obj.find('name').text
xml_box = obj.find('bndbox')
xmin = int(xml_box.find('xmin').text)
ymin = int(xml_box.find('ymin').text)
xmax = int(xml_box.find('xmax').text)
ymax = int(xml_box.find('ymax').text)
box = transform_box([xmin, ymin, xmax, ymax], transforms)
if (box[0] >= box[2]) or (box[1] >= box[3]):
continue
box = translational_box(box, start_row, start_col)
createdxml.add_object_node(obj_name, box[0], box[1], box[2], box[3])
return createdxml
def transform_img(src_img, transforms):
'''图像变换
Args:
src_img:源图片
transforms:转换操作[{'opt':'rotate','cterxy':[],'imgwh':[],'rot_angle':0,'randomRotation':False,\
'randomAngleRange':[0,360],'scale':1.0,'correction':True,'bk_imgs_dir':'xxx'},
{'opt':'crop','crop_type':RANDOM_CROP,'dsize':(0,0),'top_left_x':0,'top_left_y':0,'fw':0.5,'fh':0.7,'random_wh':False ,'iou_thr':0.5},
{'opt':'flip','flip_type':-1,'random_flip':True,'imgwh':[]},
{'opt':'resize','fx':0.5,'fy':0.5,'dsize':(0,0),'imgwh':[]}]
return:
transformed_img:变换后的图片
certain_transforms:实际变换操作参数
'''
certain_transforms = copy.deepcopy(transforms)
imgh, imgw, depth = src_img.shape
imgwh = [imgw, imgh]
transformed_img = src_img
for operate in certain_transforms:
operate['imgwh'] = imgwh # 每一种操作的输入图片宽高
if 'rotate' == operate['opt']:
bk_img = cv2.imread(os.path.join(operate['bk_imgs_dir'], utils.randomChoiceIn(operate['bk_imgs_dir'])))
cterxy = [int(imgw / 2), int(imgh / 2)]
rot_angle = operate['rot_angle']
if operate['randomRotation']:
rot_angle = random.randint(operate['randomAngleRange'][0], operate['randomAngleRange'][1])
transformed_img = rotate.rot_img_and_padding(transformed_img, bk_img, cterxy, rot_angle, operate['scale'])
operate['cterxy'] = cterxy
operate['rot_angle'] = rot_angle
elif 'resize' == operate['opt']:
resize_imgw, resize_imgh = imgwh[0], imgwh[1]
if (0, 0) == operate['dsize']:
resize_imgw = imgw * operate['fx']
resize_imgh = imgh * operate['fy']
else:
resize_imgw, resize_imgh = operate['dsize']
transformed_img = resize.resize_img(transformed_img, operate['dsize'], operate['fx'], operate['fy'])
imgwh = [resize_imgw, resize_imgh]
operate['fx'] = resize_imgw / operate['imgwh'][0]
operate['fy'] = resize_imgh / operate['imgwh'][1]
elif 'crop' == operate['opt']:
crop_imgw, crop_imgh = operate['dsize']
if (0, 0) == operate['dsize'] and not operate['random_wh']:
crop_imgw = int(operate['imgwh'][0] * operate['fw'])
crop_imgh = int(operate['imgwh'][1] * operate['fh'])
elif operate['random_wh']:
crop_imgw = int(operate['imgwh'][0] * (operate['fw'] + random.random() * (1 - operate['fw'])))
crop_imgh = int(operate['imgwh'][1] * (operate['fh'] + random.random() * (1 - operate['fh'])))
if 'CENTER_CROP' == operate['crop_type']:
top_left_x, top_left_y = int(operate['imgwh'][0] / 2 - crop_imgw / 2), int(
operate['imgwh'][1] / 2 - crop_imgh / 2)
elif 'RANDOM_CROP' == operate['crop_type']:
top_left_x, top_left_y = random.randint(0, operate['imgwh'][0] - crop_imgw - 1), random.randint(0,
operate[
'imgwh'][
1] - crop_imgh - 1)
else:
top_left_x, top_left_y = operate['top_left_x'], operate['top_left_y']
transformed_img = crop.crop_img(transformed_img, top_left_x, top_left_y, crop_imgw, crop_imgh)
imgwh = [crop_imgw, crop_imgh]
operate['top_left_x'], operate['top_left_y'] = top_left_x, top_left_y
operate['crop_w'], operate['crop_h'] = crop_imgw, crop_imgh
elif 'flip' == operate['opt']:
flip_type = operate['flip_type']
if operate['random_flip']:
flip_type = random.randint(-1, 1)
transformed_img = flip.flip_img(transformed_img, flip_type)
operate['flip_type'] = flip_type
return transformed_img, certain_transforms
def mosaic_img_xml(img, part_img, createdxml, part_xml_tree, transforms, start_row, start_col):
'''子图和xml嵌入
Args:
img:总图
part_img:嵌入图
createdxml:总图创建的xml CreateXML对象
part_xml_tree:嵌入图xml,ET.parse()
transforms:转换操作
start_row,start_col:子图嵌入起始行列
return:
img:总图
createdxml:总图创建的xml CreateXML对象
'''
transformed_img, certain_transforms = transform_img(part_img, transforms)
img = mosaic_img(img, transformed_img, start_row, start_col)
createdxml = transform_xml(part_xml_tree, createdxml, certain_transforms, start_row, start_col)
return img, createdxml
def generate_img_xml(img_save_name, imgw, imgh, part_imgw, part_imgh, transforms, imgs_dir, xmls_dir):
'''生成拼接图和拼接xml
Args:
img_save_name:
imgw,imgh:生成总图宽高
transforms:转换操作
imgs_dir:图源目录
xmls_dir:图源对应的xml目录
return:
img:总图
createdxml:总图创建的xml,ET.parse()
'''
createdxml = CreateXML(img_save_name, imgw, imgh, 3)
img = np.zeros((imgh, imgw, 3), dtype=np.uint8)
part_cols = int(imgw / part_imgw)
part_rows = int(imgh / part_imgh)
for row in range(part_rows):
for col in range(part_cols):
start_row = row * part_imgh
start_col = col * part_imgw
part_img_file = utils.randomChoiceIn(imgs_dir)
part_img = cv2.imread(os.path.join(imgs_dir, part_img_file))
part_xml_file = os.path.join(xmls_dir, part_img_file.split('.')[0] + '.xml')
part_xml_tree = voc_xml.get_xml_tree(part_xml_file)
img, createdxml = mosaic_img_xml(img, part_img, createdxml, part_xml_tree, transforms, start_row, start_col)
return img, createdxml
def generate_img_xml_from_dir(imgs_dir, xmls_dir, imgs_save_dir, xmls_save_dir, name_suffix, \
count, imgw, imgh, part_imgw, part_imgh, transforms):
'''批量拼接图片和xml
Args:
imgs_dir,xmls_dir:源图片和xml路径
imgs_save_dir,xmls_save_dir:图片和xml保存路径
name_suffix: 处理完成的图片、xml的命名标识
count:生成图片数量
imgw,imgh:目标拼接图片宽高
part_imgw,part_imgh:拼接子图宽高
transforms:转换操作[{'opt':'rotate','cterxy':[],'imgwh':[],'rot_angle':0,'randomRotation':False,\
'randomAngleRange':[0,360],'scale':1.0,'correction':True,'bk_imgs_dir':'xxx'},
{'opt':'crop','crop_type':RANDOM_CROP,'dsize':(0,0),'top_left_x':0,'top_left_y':0,'fw':0.5,'fh':0.7,'random_wh':False ,'iou_thr':0.5},
{'opt':'flip','flip_type':-1,'random_flip':True,'imgwh':[]},
{'opt':'resize','fx':0.5,'fy':0.5,'dsize':(0,0),'imgwh':[]}]
'''
for n in range(count):
img_save_name = name_suffix + '_' + str(n) + '.jpg'
img, createdxml = generate_img_xml(img_save_name, imgw, imgh, part_imgw, part_imgh, transforms, imgs_dir,
xmls_dir)
cv2.imwrite(os.path.join(imgs_save_dir, img_save_name), img)
createdxml.save_xml(xmls_save_dir, img_save_name.split('.')[0] + '.xml')
def main():
imgs_dir = 'C:/Users/pc/Desktop/test/JPEGImages/'
bk_imgs_dir = 'C:/Users/pc/Desktop/test/back/'
xmls_dir = 'C:/Users/pc/Desktop/test/Annotations/'
imgs_save_dir = 'C:/Users/pc/Desktop/test/mosaic_imgs/'
if not os.path.exists(imgs_save_dir):
os.makedirs(imgs_save_dir)
xmls_save_dir = 'C:/Users/pc/Desktop/test/mosaic_xmls/'
if not os.path.exists(xmls_save_dir):
os.makedirs(xmls_save_dir)
name_suffix = 'mosaic' # 命名标识
count = 10 # 拼接100张图片
imgw, imgh = 800, 600 # 每张拼接图的大小
part_imgw, part_imgh = int(imgw / 4), int(imgh / 3)
# transforms = [{'opt':'rotate','cterxy':[],'imgwh':[],'rot_angle':0,'randomRotation':False,\
# 'randomAngleRange':[0,360],'scale':1.0,'correction':True,'bk_imgs_dirs':bk_imgs_dir},
# {'opt':'crop','crop_type':'RANDOM_CROP','dsize':(0,0),'top_left_x':0,'top_left_y':0,\
# 'fw':0.7,'fh':0.7,'random_wh':False ,'iou_thr':0.5,'imgwh':[]},
# {'opt':'flip','flip_type':-1,'random_flip':True,'imgwh':[]},
# {'opt':'resize','fx':0.5,'fy':0.5,'dsize':(0,0),'imgwh':[]}]
transforms = [{'opt': 'rotate', 'cterxy': [], 'imgwh': [], 'rot_angle': 0, 'randomRotation': True, \
'randomAngleRange': [0, 360], 'scale': 1.0, 'correction': True, 'bk_imgs_dir': bk_imgs_dir},
{'opt': 'crop', 'crop_type': 'RANDOM_CROP', 'dsize': (0, 0), 'top_left_x': 0, 'top_left_y': 0, \
'crop_w': 0, 'crop_h': 0, 'fw': 0.6, 'fh': 0.6, 'random_wh': True, 'iou_thr': 0.5, 'imgwh': []},
{'opt': 'flip', 'flip_type': -1, 'random_flip': True, 'imgwh': []},
{'opt': 'resize', 'fx': 0.5, 'fy': 0.5, 'dsize': (part_imgw, part_imgh), 'imgwh': []}]
generate_img_xml_from_dir(imgs_dir, xmls_dir, imgs_save_dir, xmls_save_dir, name_suffix, \
count, imgw, imgh, part_imgw, part_imgh, transforms)
if __name__ == '__main__':
main()
四、resize
import cv2
import voc_xml
from voc_xml import CreateXML
import os
def resize_xy(x, y, fx, fy):
'''
放缩点坐标
Args:
x,y:待放缩点坐标
fx,fy:放缩比例
return:
x,y:放缩后坐标点
'''
return int(x * fx), int(y * fy)
def resize_box(box, fx, fy):
'''
放缩目标框:
Args:
box: 目标框 [xmin,ymin,xmax,ymax]
fx,fy: x,y坐标轴放缩比例
return:
rsize_box: 放缩后的坐标框 [xmin,ymin,xmax,ymax]
'''
xmin, ymin = resize_xy(box[0], box[1], fx, fy)
xmax, ymax = resize_xy(box[2], box[3], fx, fy)
return [xmin, ymin, xmax, ymax]
def resize_img(src, dsize=(0, 0), fx=1.0, fy=1.0):
'''
放缩图片
Args:
src:源图片
dsize:指定放缩大小(w,h)
fx,fy:比例放缩
return:
sized_img:放缩后的图像
'''
sized_img = cv2.resize(src, dsize, fx=fx, fy=fy)
return sized_img
def resize_xml(resized_img_name, xml_tree, dsize=(0, 0), fx=1.0, fy=1.0):
'''
xml目标框放缩变换
Args:
resized_img_name: resize图片保存名
xml_tree: 待resize xml ET.parse()
dsize:指定放缩大小(w,h)
fx,fy:比例放缩
return:
createdxml : 创建的xml CreateXML对象
'''
root = xml_tree.getroot()
size = root.find('size')
imgw, imgh, depth = int(size.find('width').text), int(size.find('height').text), int(size.find('depth').text)
resize_imgw, resize_imgh = imgw, imgh
if dsize == (0, 0):
resize_imgw = int(imgw * fx)
resize_imgh = int(imgh * fy)
else:
resize_imgw, resize_imgh = dsize
rsize_fx, resize_fy = resize_imgw / imgw, resize_imgh / imgh
createdxml = CreateXML(resized_img_name, resize_imgw, resize_imgh, depth)
for obj in root.iter('object'):
obj_name = obj.find('name').text
xml_box = obj.find('bndbox')
xmin = int(xml_box.find('xmin').text)
ymin = int(xml_box.find('ymin').text)
xmax = int(xml_box.find('xmax').text)
ymax = int(xml_box.find('ymax').text)
box = resize_box([xmin, ymin, xmax, ymax], rsize_fx, resize_fy)
if (box[0] >= box[2]) or (box[1] >= box[3]):
continue
createdxml.add_object_node(obj_name, box[0], box[1], box[2], box[3])
return createdxml
def generate_resizeImg_xml(img, xml_tree, resized_img_name, dsize=(0, 0), fx=1.0, fy=1.0):
'''
生成旋转后的图片和xml文件
Args:
img:源图片
xml_tree:待resizexml ET.parse()
resized_img_name: resize图片保存名
dsize:指定放缩大小(w,h)
fx,fy:比例放缩
return:
resized_img,resized_xml
'''
resized_img = resize_img(img, dsize, fx, fy)
resized_xml = resize_xml(resized_img_name, xml_tree, dsize, fx, fy)
return resized_img, resized_xml
def resizeImg_xml_from_dir(imgs_dir, xmls_dir, imgs_save_dir, xmls_save_dir, img_suffix, name_suffix, dsize=(0, 0),
fx=1.0, fy=1.0):
'''
放缩指定路径下的图片和xml
Args:
imgs_dir,xmls_dir: 待放缩图片、原始xml文件存储路径
imgs_save_dir,xmls_save_dir: 处理完成的图片、xml文件存储路径
img_suffix: 图片可能的后缀名['.jpg','.png','.bmp',..]
name_suffix: 处理完成的图片、xml的命名标识
dsize: 指定放缩大小(w,h)
fx,fy: 比例放缩
'''
for root, dirs, files in os.walk(xmls_dir):
for xml_name in files:
xml_file = os.path.join(xmls_dir, xml_name)
# print(xml_file)
img_file = None
for suffix in img_suffix:
# print(os.path.join(imgs_dir,xml_name.split('.')[0]+suffix))
if os.path.exists(os.path.join(imgs_dir, xml_name.split('.')[0] + suffix)):
img_file = os.path.join(imgs_dir, xml_name.split('.')[0] + suffix)
break
if img_file is None:
print("there has no JPEGImages for ", xml_name)
continue
img = cv2.imread(img_file)
imgh, imgw, n_channels = img.shape
resize_imgw, resize_imgh = imgw, imgh
if dsize == (0, 0):
resize_imgw = imgw * fx
resize_imgh = imgh * fy
else:
resize_imgw, resize_imgh = dsize
resized_img_name = xml_name.split('.')[0] + '_' + name_suffix + str(resize_imgw) + 'x' + str(
resize_imgh) + '.' + img_file.split('.')[-1]
imgResize, xmlResize = generate_resizeImg_xml(img, voc_xml.get_xml_tree(xml_file), resized_img_name, dsize,
fx, fy)
cv2.imwrite(os.path.join(imgs_save_dir, resized_img_name), imgResize)
xmlResize.save_xml(xmls_save_dir, resized_img_name.split('.')[0] + '.xml')
def main():
imgs_dir = 'C:/Users/pc/Desktop/JPEGImages/'
xmls_dir = 'C:/Users/pc/Desktop/Annotations/'
imgs_save_dir = 'C:/Users/pc/Desktop/image_resize/'
if not os.path.exists(imgs_save_dir):
os.makedirs(imgs_save_dir)
xmls_save_dir = 'C:/Users/pc/Desktop/label_resize/'
if not os.path.exists(xmls_save_dir):
os.makedirs(xmls_save_dir)
img_suffix = ['.jpg', '.png', '.bmp']
name_suffix = 'rsize' # 命名标识
dsize = (400, 200) # 指定放缩大小(w,h)
fx = 1.0
fy = 1.0 # 放缩比例
resizeImg_xml_from_dir(imgs_dir, xmls_dir, imgs_save_dir, xmls_save_dir, img_suffix, name_suffix, dsize, fx, fy)
if __name__ == '__main__':
main()
五、旋转
import cv2
import os
import math
import random
import voc_xml
from voc_xml import CreateXML
import utils
# 标注框坐标旋转
def rot_xy(rot_cter_x, rot_cter_y, x, y, seta, scale=1.0):
'''
Args:
rot_cter_x,rot_cter_y:旋转中心x,y坐标
x,y:待旋转点x,y坐标
seta:旋转角度,顺时针,与opencv图像旋转相反
scale:放缩尺寸
return:
rotx,roty:旋转后的坐标x,y
'''
rad_seta = math.radians(-seta)
rotx = rot_cter_x + (x - rot_cter_x) * scale * math.cos(rad_seta) - (y - rot_cter_y) * scale * math.sin(rad_seta)
roty = rot_cter_y + (x - rot_cter_x) * scale * math.sin(rad_seta) + (y - rot_cter_y) * scale * math.cos(rad_seta)
return int(rotx), int(roty)
def rot_box(box, cterxy, imgwh, rot_angle, scale=1.0, correction=True):
'''
Args:
box:边框坐标[xmin,ymin,xmax,ymax]
cterxy:旋转中心点坐标 [cter_x,cter_y]
imgwh:图片宽高[w,h]
rot_angle:旋转角
scale:放缩尺度
correction: bool,修正旋转后的目标框为正常左上右下坐标
return:
box:边框坐标[x1,y1,x2,y2,x3,y3,x4,y4],左上开始,逆时针
'''
result_box = []
xmin, ymin, xmax, ymax = box[0], box[1], box[2], box[3]
complete_coords = [xmin, ymin, xmin, ymax, xmax, ymax, xmax, ymin]
for i in range(int(len(complete_coords) / 2)):
rotx, roty = rot_xy(cterxy[0], cterxy[1], complete_coords[2 * i], complete_coords[2 * i + 1], rot_angle, scale)
result_box.append(rotx)
result_box.append(roty)
if correction:
xmin = min(result_box[0:len(result_box):2])
xmax = max(result_box[0:len(result_box):2])
ymin = min(result_box[1:len(result_box):2])
ymax = max(result_box[1:len(result_box):2])
xmin_v = utils.confine(xmin, 0, imgwh[0] - 1)
ymin_v = utils.confine(ymin, 0, imgwh[1] - 1)
xmax_v = utils.confine(xmax, 0, imgwh[0] - 1)
ymax_v = utils.confine(ymax, 0, imgwh[1] - 1)
# 使用阈值剔除边缘截断严重的目标
if utils.calc_iou([xmin, ymin, xmax, ymax], [xmin_v, ymin_v, xmax_v, ymax_v]) < 0.5:
xmin_v, ymin_v, xmax_v, ymin_v = 0, 0, 0, 0
return [xmin_v, ymin_v, xmin_v, ymax_v, xmax_v, ymax_v, xmax_v, ymin_v]
else:
return complete_coords
def rot_xml(rot_img_name, xml_tree, cterxy, rot_angle, scale=1.0, correction=True):
'''
旋转xml文件
Args:
xml_tree: 待旋转xml ET.parse()
cterxy: 旋转中心坐标[cter_x,cter_y]
rot_img_name: 旋转后图片保存名字
rot_angle:旋转角度
scale:放缩尺度
correction: bool,修正旋转后的目标框为正常左上右下坐标
return:
createdxml : 创建的xml CreateXML对象
'''
root = xml_tree.getroot()
size = root.find('size')
imgw, imgh, depth = int(size.find('width').text), int(size.find('height').text), int(size.find('depth').text)
createdxml = CreateXML(rot_img_name, imgw, imgh, depth)
for obj in root.iter('object'):
obj_name = obj.find('name').text
xml_box = obj.find('bndbox')
xmin = int(xml_box.find('xmin').text)
ymin = int(xml_box.find('ymin').text)
xmax = int(xml_box.find('xmax').text)
ymax = int(xml_box.find('ymax').text)
# 边框坐标[x1,y1,x2,y2,x3,y3,x4,y4],左上开始,逆时针
box = rot_box([xmin, ymin, xmax, ymax], cterxy, [imgw, imgh], rot_angle, scale, correction)
rxmin, rymin, rxmax, rymax = utils.confine(box[0], 0, imgw - 1), utils.confine(box[1], 0,
imgh - 1), utils.confine(box[4],
0,
imgw - 1), utils.confine(
box[5], 0, imgh - 1)
if (rxmin >= rxmax) or (rymin >= rymax):
continue
createdxml.add_object_node(obj_name, box[0], box[1], box[4], box[5])
return createdxml
# 旋转图片,并使用背景图填充四个角
def rot_img_and_padding(img, bk_img, cterxy, rot_angle, scale=1.0):
'''
以图片中心为原点旋转
Args:
img:待旋转图片
bk_img:背景填充图片
cterxy: 旋转中心[x,y]
rot_angle:旋转角度,逆时针
scale:放缩尺度
return:
imgRotation:旋转后的cv图片
'''
img_rows, img_cols = img.shape[:2]
bk_rows, bk_cols = bk_img.shape[:2]
# 背景填充图块选择偏移
r_offset = bk_rows - int(bk_rows / random.randint(1, 5))
c_offset = bk_cols - int(bk_cols / random.randint(1, 5))
matRotation = cv2.getRotationMatrix2D((cterxy[0], cterxy[1]), rot_angle, scale)
imgRotation = cv2.warpAffine(img, matRotation, (int(img_cols), int(img_rows)), borderValue=(0, 0, 0))
rot_img_rows, rot_img_cols = imgRotation.shape[:2]
for r in range(0, rot_img_rows):
left_done, right_done = False, False
for c in range(0, rot_img_cols):
left_c, right_c = c, rot_img_cols - 1 - c
if left_c > right_c:
break
if not left_done:
if not imgRotation[r, left_c].any():
bk_r, bk_c = r % (bk_rows - r_offset) + r_offset, left_c % (bk_cols - c_offset) + c_offset
imgRotation[r, left_c] = bk_img[bk_r, bk_c]
else:
left_done = True
if not right_done:
if not imgRotation[r, right_c].any():
bk_r, bk_c = r % (bk_rows - r_offset) + r_offset, right_c % (bk_cols - c_offset) + c_offset
imgRotation[r, right_c] = bk_img[bk_r, bk_c]
if left_done and right_done:
break
return imgRotation
def generate_rotImg_xml(img, bk_img, xml_tree, cterxy, rot_img_name, rot_angle, scale=1.0, correction=True):
'''
旋转图片和对应的xml
Args:
img: 待旋转图片路径
bk_img: 背景图片路径
xml_tree: img对应的标注文件,ET.parse()
cterxy:旋转中心[x,y]
rot_img_name:旋转后图片保存名字
rot_angle: 旋转角度
scale: 放缩尺度
correction: bool,修正旋转后的目标框为正常左上右下坐标
return:
imgRotation:旋转后的图片
xmlRotation:旋转后的xml文件
'''
imgRotation = rot_img_and_padding(img, bk_img, cterxy, rot_angle, scale)
xmlRotation = rot_xml(rot_img_name, xml_tree, cterxy, rot_angle, scale, correction)
return imgRotation, xmlRotation
def rotImg_xml_centre_from_dirs(imgs_dir, bk_imgs_dir, xmls_dir, rot_img_save_dir, rot_xmls_save_dir, img_suffix,
name_suffix, rot_angles, randomAngleRange=[0, 360], random_num=1, randomRotation=False,
scale=1.0, correction=True):
'''
旋转指定路径下的所有图片和xml,以每张图片中心点为旋转中心,并存储到指定路径
Args:
imgs_dir,bk_imgs_dir,xmls_dir: 待旋转图片、背景图片、原始xml文件存储路径
rot_img_save_dir,rot_xmls_save_dir:旋转完成的图片、xml文件存储路径
img_suffix: 图片可能的后缀名['.jpg','.png','.bmp',..]
name_suffix:旋转完成的图片、xml的命名后缀标识
rot_angles: 指定旋转角度[ang1,ang2,ang3,...]
randomAngleRange: 随机旋转上下限角度[bottom_angle,top_angle]
random_num: 随机旋转角度个数,randomRotation=True时生效
randomRotation: 使能随机旋转
scale: 放缩尺度
correction: bool,修正旋转后的目标框为正常左上右下坐标
'''
for root, dirs, files in os.walk(xmls_dir):
for xml_name in files:
xml_file = os.path.join(xmls_dir, xml_name)
img_file = None
for suffix in img_suffix:
# print(os.path.join(imgs_dir,xml_name.split('.')[0]+suffix))
if os.path.exists(os.path.join(imgs_dir, xml_name.split('.')[0] + suffix)):
img_file = os.path.join(imgs_dir, xml_name.split('.')[0] + suffix)
break
if img_file is None:
print("there has no JPEGImages for ", xml_name)
continue
img = cv2.imread(img_file)
imgh, imgw, n_channels = img.shape
rot_num = random_num
if not randomRotation:
rot_num = len(rot_angles)
for i in range(rot_num):
r_angle = 0
if randomRotation:
r_angle = random.randint(randomAngleRange[0], randomAngleRange[1])
else:
r_angle = rot_angles[i]
bk_img = cv2.imread(os.path.join(bk_imgs_dir, utils.randomChoiceIn(bk_imgs_dir)))
rot_img_name = xml_name.split('.')[0] + '_' + name_suffix + str(r_angle) + '.' + img_file.split('.')[-1]
imgRotation, xmlRotation = generate_rotImg_xml(img, bk_img, voc_xml.get_xml_tree(xml_file),
[int(imgw / 2), int(imgh / 2)], rot_img_name, r_angle,
scale, correction)
cv2.imwrite(os.path.join(rot_img_save_dir, rot_img_name), imgRotation)
xmlRotation.save_xml(rot_xmls_save_dir, rot_img_name.split('.')[0] + '.xml')
def main():
imgs_dir = 'C:/Users/pc/Desktop/test/JPEGImages/'
bk_imgs_dir = 'C:/Users/pc/Desktop/test/back/'
xmls_dir = 'C:/Users/pc/Desktop/test/Annotations/'
rot_imgs_save_dir = 'C:/Users/pc/Desktop/test/rot_imgs/'
if not os.path.exists(rot_imgs_save_dir):
os.makedirs(rot_imgs_save_dir)
rot_xmls_save_dir = 'C:/Users/pc/Desktop/test/rot_xmls/'
if not os.path.exists(rot_xmls_save_dir):
os.makedirs(rot_xmls_save_dir)
img_suffix = ['.jpg', '.png', '.bmp']
name_suffix = 'rot' # 命名标识
rot_angles = [] # 指定旋转角度,当randomRotation=False时有效
random_num = 3 # 随机旋转角度个数
randomRotation = True # 使用随机旋转
rotImg_xml_centre_from_dirs(imgs_dir, bk_imgs_dir, xmls_dir, rot_imgs_save_dir, rot_xmls_save_dir, img_suffix, \
name_suffix, rot_angles, random_num=random_num, randomRotation=randomRotation,
scale=0.8)
if __name__ == '__main__':
main()
六、显示
import cv2
import os
import utils
import math
import xml.etree.ElementTree as ET
def get_color_channel(c, offset, maxclass):
'''获取每个通道的颜色值
Args:
c:颜色通道
offset:类别偏置
maxclass:最大类别数
return:
r:该通道颜色
'''
colors = [[1, 0, 1], [0, 0, 1], [0, 1, 1], [0, 1, 0], [1, 1, 0], [1, 0, 0]]
ratio = (offset / maxclass) * 5
i = math.floor(ratio)
j = math.ceil(ratio)
ratio -= i
r = (1 - ratio) * colors[i][c] + ratio * colors[j][c]
return r
def get_color(cls, maxcls=20):
'''为一个类别生成一种特定显示颜色
Args:
cls:类别id (from 0)
maxcls:最大类别数
return:
color:(B,G,R) 颜色
'''
if cls > maxcls:
maxcls = maxcls * (int(cls / maxcls) + 1)
offset = cls * 123457 % maxcls
b = get_color_channel(0, offset, maxcls) * 255
g = get_color_channel(1, offset, maxcls)
r = get_color_channel(2, offset, maxcls)
return (int(b * 255), int(g * 255), int(r * 255))
def show_data(img_file, xml_file, windowname='ORG', class_color={}, showname=True, maxcls=20, wait_sec=0):
'''显示一张图片
Args:
img_file:图片文件
xml_file:xml标注文件
windowname:显示窗口名
class_color:已有类别目标框显示颜色
showname:是否显示类别名
maxcls:最大类别
wait_sec:opencv响应等待时间
return:
key:opencv响应键值
'''
tree = ET.parse(xml_file)
xml_root = tree.getroot()
cv2.namedWindow(windowname, cv2.WINDOW_AUTOSIZE)
img = cv2.imread(img_file)
rows, cols, _ = img.shape
for obj in xml_root.iter('object'):
cls_name = obj.find('name').text
if cls_name in class_color:
color = class_color[cls_name]
else:
cls_id = len(class_color)
color = get_color(cls_id, maxcls)
class_color[cls_name] = color
xmlbox = obj.find('bndbox')
box = list(map(int, [float(xmlbox.find('xmin').text), float(xmlbox.find('ymin').text), \
float(xmlbox.find('xmax').text), float(xmlbox.find('ymax').text)]))
cv2.rectangle(img, (box[0], box[1]), (box[2], box[3]), color, max([int(min([rows, cols]) * 0.003), 1]))
if showname:
retval, baseline = cv2.getTextSize(cls_name, cv2.FONT_HERSHEY_PLAIN, \
0.1 * rows / 90, max([int(min([rows, cols]) * 0.001), 1]))
cv2.rectangle(img, (box[0], box[1] - retval[1]), (box[0] + retval[0], box[1]), color, -1, 8, 0)
cv2.putText(img, cls_name, (box[0], box[1]), cv2.FONT_HERSHEY_PLAIN, 0.1 * rows / 90, \
(0, 0, 0), max([int(min([rows, cols]) * 0.001), 1]))
cv2.imshow(windowname, img)
key = cv2.waitKeyEx(wait_sec) # waitKey对上下左右方向键的返回值均为0,waitKeyEx有不同的值
return key
def show_data_in_dir(imgs_dir, xmls_dir, windowname='ORG', class_color={}, showname=True, maxcls=20, delete=False):
'''显示图片和标注框
Args:
imgs_dir:图片目录
xmls_dir:标注文件xml目录,voc格式
windowname:显示窗口名
class_color:类别显示颜色的BGR值
showname:是否显示类别名
maxcls:最大类别
delete:是否删除没有图片的xml文件
'''
xml_count, img_count = utils.fileCountIn(xmls_dir), utils.fileCountIn(imgs_dir)
print('------show object boxes based on xml files (xml:%d,JPEGImages:%d)------' % (xml_count, img_count))
count = 0
cv2.namedWindow(windowname, cv2.WINDOW_AUTOSIZE)
wait_sec = 0
for root, dirs, files in os.walk(xmls_dir):
idx = 0
while idx < len(files):
file = files[idx]
count += 1
if count % 100 == 0:
print('[%d | %d]%d%%' % (xml_count, count, count * 100 / xml_count))
xml_file = os.path.join(xmls_dir, file)
tree = ET.parse(xml_file)
xml_root = tree.getroot()
img_name = xml_root.find('filename').text
img_file = os.path.join(imgs_dir, img_name)
if not os.path.exists(img_file):
print('%s not exist!' % img_file)
if delete:
os.remove(xml_file)
print(xml_file, 'has been removed!')
idx += 1
continue
print(img_name)
key = show_data(img_file, xml_file, windowname, class_color, showname, maxcls, wait_sec)
if (32 == key):
wait_sec = 1 - wait_sec
elif (key == ord('q') or key == ord('Q')):
return 0
elif (key == 2424832 or key == 2490368 or key == ord('p')):
# 左、上方向键或p查看上一张图片
idx -= 1
else:
idx += 1
cv2.destroyAllWindows()
return 0
def show_data_in_pathfile(pathfile, windowname='ORG', class_color={}, showname=True, maxcls=20):
'''根据pathfile文件中的图片路径显示图片和标注框,要求以voc标准格式存放
Args:
pathfile:图片路径文件
windowname:显示窗口名
class_color:类别颜色的RGB值
showname:是否显示类别名
maxcls:最大类别
'''
imgpathfiles = open(pathfile)
imgfilelines = imgpathfiles.readlines()
fileCount = len(imgfilelines)
print("----------- %d images------------" % fileCount)
count = 0
cv2.namedWindow(windowname, cv2.WINDOW_AUTOSIZE)
wait_sec = 0
idx = 0
while idx < fileCount:
imgfile = imgfilelines[idx].strip()
dirname = os.path.dirname(imgfile).replace('JPEGImages', 'Annotations')
xmlfile = os.path.join(dirname, os.path.basename(imgfile).split('.')[0] + '.xml')
count += 1
if count % 100 == 0:
print('[%d | %d]%d%%' % (fileCount, count, count * 100 / fileCount))
if not os.path.exists(xmlfile):
print(xmlfile, ' not exist!')
idx += 1
continue
if not os.path.exists(imgfile):
print(imgfile, ' not exist')
idx += 1
continue
print(os.path.basename(imgfile))
key = show_data(imgfile, xmlfile, windowname, class_color, showname, maxcls, wait_sec)
if (32 == key):
wait_sec = 1 - wait_sec
elif (key == ord('q') or key == ord('Q')):
return 0
elif (2424832 == key or 2490368 == key or key == ord('p')):
# 左、上方向键或p查看上一张图片
idx -= 1
else:
idx += 1
cv2.destroyAllWindows()
def main():
img_file = 'C:/Users/pc/Desktop/test/JPEGImages/036.jpg'
xml_file = 'C:/Users/pc/Desktop/test/Annotations/036.xml'
# imgs_dir = 'C:/Users/pc/Desktop/test/JPEGImages/'
# xmls_dir = 'C:/Users/pc/Desktop/test/Annotations/'
imgs_dir = 'C:/Users/pc/Desktop/test/trans_imgs/'
xmls_dir = 'C:/Users/pc/Desktop/test/trans_xmls/'
# imgpathsfile = 'E:/myjob/DataSet/DETRAC_VOC_v2/detrac_train_v2.txt'
# show_data(img_file,xml_file) #显示单张图片标注框
# 显示文件夹中的图片和标注文件
# 空格键连续显示,左、上键显示上一张,右、下键显示下一张,q键退出
show_data_in_dir(imgs_dir, xmls_dir, showname=True, maxcls=20)
# 显示路径文件中的图片和标注文件(voc标准格式)
# 空格键连续显示,左、上键和p 显示上一张,右、下键显示下一张,q键退出
# show_data_in_pathfile(imgpathsfile)
cv2.destroyAllWindows()
if __name__ == '__main__':
main()
七、随机组合变换
import cv2
import os
import numpy as np
import utils
import mosaic
import voc_xml
from voc_xml import CreateXML
def transform_img_xml(src_imgpath, src_xmlpath, transforms, img_save_name):
'''按transforms中的转换操作变换img和xml
Args:
src_imgpath: 待变换的图片路径
src_xmlpath: xml标注文件路径
transforms:转换操作
img_save_name: 图片保存名
return:
transformed_img:转换完成的图片
createdxml:转换生成的新标签
'''
src_img = cv2.imread(src_imgpath)
src_xml = voc_xml.get_xml_tree(src_xmlpath)
transformed_img, certain_transforms = mosaic.transform_img(src_img, transforms)
imgh, imgw, n_channels = transformed_img.shape
createdxml = CreateXML(img_save_name, imgw, imgh, n_channels)
createdxml = mosaic.transform_xml(src_xml, createdxml, certain_transforms, 0, 0)
return transformed_img, createdxml
def transform_onefile(src_imgpath, src_xmlpath, imgs_save_dir, xmls_save_dir, transforms, N=1):
'''对一张图进行转换,并生成转换后的图片和xml文件
Args:
src_imgpath: 待变换的图片路径
src_xmlpath: xml标注文件路径
imgs_save_dir:图片文件保存目录
xmls_save_dir:xml文件保存目录
transforms:转换操作
N:每张原图生成N张转换图
'''
for n in range(1, N + 1):
imgname = os.path.basename(src_imgpath).split('.')[0]
new_imgname = imgname + '_trans' + str(n).zfill(3)
img_save_name = new_imgname + '.jpg'
transformed_img, createdxml = transform_img_xml(src_imgpath, src_xmlpath, transforms, img_save_name)
cv2.imwrite(os.path.join(imgs_save_dir, img_save_name), transformed_img)
createdxml.save_xml(xmls_save_dir, img_save_name.split('.')[0] + '.xml')
def transform_file_from_dirs(imgs_xmls_dirs, imgs_save_dir, xmls_save_dir, transforms, N=1):
'''对文件夹中所有图片进行转换,并生成转换后的图片和xml文件
Args:
imgs_xmls_dirs:待转换的图片、xml、背景图片目录
imgs_save_dir:图片文件保存目录
xmls_save_dir:xml文件保存目录
transforms:转换操作
N:每张原图生成N张转换图
'''
for i in range(len(imgs_xmls_dirs)):
imgs_dir = imgs_xmls_dirs[i]['imgs_dir']
xmls_dir = imgs_xmls_dirs[i]['xmls_dir']
bk_imgs_dir = imgs_xmls_dirs[i]['bk_imgs_dir']
for trans in transforms:
if trans['opt'] == 'rotate':
trans['bk_imgs_dir'] = bk_imgs_dir
fileCount = utils.fileCountIn(imgs_dir)
count = 0
for root, dirs, files in os.walk(imgs_dir):
for imgname in files:
src_imgpath = os.path.join(imgs_dir, imgname)
src_xmlpath = os.path.join(xmls_dir, imgname.split('.')[0] + '.xml')
count += 1
if count % 10 == 0:
print('[%d | %d]%d%%' % (fileCount, count, count * 100 / fileCount))
if not os.path.exists(src_xmlpath):
print(src_xmlpath, ' not exist!')
continue
transform_onefile(src_imgpath, src_xmlpath, imgs_save_dir, xmls_save_dir, transforms, N)
def main():
imgs_xmls_dirs = {0: {'imgs_dir': 'C:/Users/pc/Desktop/dataset/JPEGImages/', \
'bk_imgs_dir': 'C:/Users/pc/Desktop/dataset/back/', \
'xmls_dir': 'C:/Users/pc/Desktop/dataset/Annotations/'},
}
imgs_save_dir = 'C:/Users/pc/Desktop/dataset/trans_imgs/'
if not os.path.exists(imgs_save_dir):
os.makedirs(imgs_save_dir)
xmls_save_dir = 'C:/Users/pc/Desktop/dataset/trans_xmls/'
if not os.path.exists(xmls_save_dir):
os.makedirs(xmls_save_dir)
N = 5
transforms = [{'opt': 'resize', 'fx': 0.5, 'fy': 0.5, 'dsize': (1024, 1024), 'imgwh': []},
{'opt': 'rotate', 'cterxy': [], 'imgwh': [], 'rot_angle': 0, 'randomRotation': True, \
'randomAngleRange': [0, 360], 'scale': 0.3, 'correction': True, 'bk_imgs_dir': ''}, \
{'opt': 'flip', 'flip_type': -1, 'random_flip': True, 'imgwh': []},
{'opt': 'crop', 'crop_type': 'RANDOM_CROP', 'dsize': (500, 500), 'top_left_x': 0, 'top_left_y': 0, \
'crop_w': 0, 'crop_h': 0, 'fw': 0.6, 'fh': 0.6, 'random_wh': False, 'iou_thr': 0.5, 'imgwh': []}]
transform_file_from_dirs(imgs_xmls_dirs, imgs_save_dir, xmls_save_dir, transforms, N)
if __name__ == '__main__':
main()
八、明暗变化
import cv2
import numpy as np
import os.path
import shutil
# 亮度
def brightness(image, percetage):
image_copy = image.copy()
w = image.shape[1]
h = image.shape[0]
# get brighter
for xi in range(0, w):
for xj in range(0, h):
image_copy[xj, xi, 0] = np.clip(int(image[xj, xi, 0] * percetage), a_max=255, a_min=0)
image_copy[xj, xi, 1] = np.clip(int(image[xj, xi, 1] * percetage), a_max=255, a_min=0)
image_copy[xj, xi, 2] = np.clip(int(image[xj, xi, 2] * percetage), a_max=255, a_min=0)
return image_copy
if __name__ == '__main__':
# 图片文件夹路径
input_jpg = r'C:/Users/pc/Desktop/dataset/JPEGImages'
input_xml = r'C:/Users/pc/Desktop/dataset/Annotations'
imgs_save_dir = r'C:/Users/pc/Desktop/dataset/image_dark'
if not os.path.exists(imgs_save_dir):
os.makedirs(imgs_save_dir)
xmls_save_dir = 'C:/Users/pc/Desktop/dataset/label_dark'
if not os.path.exists(xmls_save_dir):
os.makedirs(xmls_save_dir)
for img_name in os.listdir(input_jpg):
name = img_name.split('.')[0]
print(name)
print(img_name)
img_path = os.path.join(input_jpg, img_name)
img = cv2.imread(img_path)
xml_src_path = os.path.join(input_xml, name + '.xml')
xml_dst_path = os.path.join(xmls_save_dir, name)
# 变暗
img_darker = brightness(img, 0.7)
cv2.imwrite(os.path.join(imgs_save_dir, name + '_darker.jpg'), img_darker)
shutil.copyfile(xml_src_path, xml_dst_path + '_darker.xml')
print("Save " + os.path.join(imgs_save_dir, name + '_darker.jpg') + " Successfully!")
# # 变亮
# img_brighter = brightness(img, 1.5)
# cv2.imwrite(os.path.join(imgs_save_dir, name + '_brighter.jpg'), img_brighter)
# shutil.copyfile(xml_src_path, xml_dst_path + '_brighter.xml')
# print("Save " + os.path.join(imgs_save_dir, name + '_brighter.jpg') + " Successfully!")