在进行语义分割,目标检测等深度学习任务时,需要对原始数据进行一定的处理,增广数据集,再入到网络当中。本文实现对原始图像的缩小,但是不改变原始图像的长宽,对于缩小的图像进行边缘的填充,使得和原始图像大小一致。
import numpy as np
import cv2
import math
import random
import os
import xml.etree.ElementTree as ET
from PIL import Image
name_classes = ['hqc'] # 类别名,可以更改为对应的voc类别名称即可
def resize_xml(xml_file_name,new_xml_name,original_jpg_name,original_png_name,new_jpg_name,new_png_name,new_size=(224,224),original_size=(256,256)):
original_jpg =Image.open(original_jpg_name)
cv_original_jpg = cv2.cvtColor(np.asarray(original_jpg), cv2.COLOR_RGB2BGR)
cv_original_jpg= cv2.resize(cv_original_jpg,(224,224))
# 16是边缘填充的长度,图片先resize为224大小,则再变回256,则四边都需要填充16
new_jpg =cv2.copyMakeBorder(cv_original_jpg, 16, 16, 16, 16, cv2.BORDER_CONSTANT, value=(0, 0, 0)) # 填充保持图片的大小不变。
cv2.imencode('.jpg', new_jpg)[1].tofile(new_jpg_name)
original_png =Image.open(original_png_name)
cv_original_png = cv2.cvtColor(np.asarray(original_png), cv2.COLOR_RGB2BGR)
cv_original_png = cv2.resize(cv_original_png, (224, 224))
new_png = cv2.copyMakeBorder(cv_original_png, 16, 16, 16, 16, cv2.BORDER_CONSTANT, value=(0, 0, 0))
cv2.imencode('.png', new_png)[1].tofile(new_png_name)
scale_x =new_size[0]/original_size[0]
scale_y = new_size[1] / original_size[1]
tree = ET.parse(xml_file_name) # 因为我对边缘进行了填充,故不改变原图的大小size,只是对xml的边界框坐标进行更改。
objs = tree.findall('object')
for ix, obj in enumerate(objs):
name = obj.find('name').text
if name in name_classes: # name_classes的里面放的是你含有多少种类的框的名字,列表
# print(xml_file)
obj_new = obj.find('bndbox')
xmin= str(int(float(obj_new.find('xmin').text) * scale_x)+16)
xmax = str(int(float(obj_new.find('xmax').text) * scale_x)+16)
ymin = str(int(float(obj_new.find('ymin').text) * scale_y)+16)
ymax = str(int(float(obj_new.find('ymax').text) * scale_y)+16)
obj_new.find('xmin').text = xmin
obj_new.find('xmax').text = xmax
obj_new.find('ymin').text = ymin
obj_new.find('ymax').text = ymax
tree.write(new_xml_name)
if __name__ == '__main__':
resize_jpg_path = r'F:\数据集\image'
resize_png_path = r'F:\数据集\label'
resize_xml_path = r'F:\数据集\annotation'
orginal_jpg_files =os.listdir(resize_jpg_path)
orginal_png_files = os.listdir(resize_png_path)
orginal_xml_files = os.listdir(resize_xml_path)
# new_size = (224, 224)
# original_size = (256, 256)
new_jpg_path='F:/数据集/image'
new_png_path = 'F:/数据集/label'
new_xml_path = 'F:/数据集/annotation'
num =1
for i in orginal_jpg_files:
orginal_image_id = i
orginal_png_id = i.split('.')[0] + '.png'
orginal_xml_id = i.split('.')[0] + '.xml'
orginal_jpg_name = os.path.join(resize_jpg_path, orginal_image_id)
orginal_png_name = os.path.join(resize_png_path, orginal_png_id)
orginal_xml_name = os.path.join(resize_xml_path, orginal_xml_id)
new_jpg_id = str(num) + '.jpg'
new_jpg_name = os.path.join(new_jpg_path, new_jpg_id)
new_png_id = str(num) + '.png'
new_png_name = os.path.join(new_png_path, new_png_id)
new_xml_id = str(num) + '.xml'
new_xml_name = os.path.join(new_xml_path, new_xml_id)
resize_xml(xml_file_name=orginal_xml_name,new_xml_name=new_xml_name,original_jpg_name=orginal_jpg_name,
new_jpg_name=new_jpg_name,original_png_name=orginal_png_name,new_png_name=new_png_name)
num=num+1
图像进行边缘填充时,利用的函数时cv2.copyMakeBorder()
本文中:
cv2.copyMakeBorder(cv_original_jpg, 16, 16, 16, 16, cv2.BORDER_CONSTANT, value=(0, 0, 0))
参数的说明:
- src:要处理的原图 cv_original_jpg
- top, bottom, left, right:上下左右要扩展的像素数,本文的原始图像256*256,缩小为224*224,然后要保持原始图像一样的大小,故需要对边缘各填充16个像素值
- borderType:边框类型,这个就是需要关注的填充方式。 cv2.BORDER_CONSTANT是一个常数值的填充,value=(0,0,0)全黑的填充。
本人做的是多任务的学习,故分割的标签做同样的处理。
标签处理的代码说明:
for ix, obj in enumerate(objs):
name = obj.find('name').text
if name in name_classes: # name_classes的里面放的是你含有多少种类的框的名字,列表
# print(xml_file)
obj_new = obj.find('bndbox')
xmin= str(int(float(obj_new.find('xmin').text) * scale_x)+16)
xmax = str(int(float(obj_new.find('xmax').text) * scale_x)+16)
ymin = str(int(float(obj_new.find('ymin').text) * scale_y)+16)
ymax = str(int(float(obj_new.find('ymax').text) * scale_y)+16)
obj_new.find('xmin').text = xmin
obj_new.find('xmax').text = xmax
obj_new.find('ymin').text = ymin
obj_new.find('ymax').text = ymax
tree.write(new_xml_name)
坐标变换的公式:
xmin= str(int(float(obj_new.find('xmin').text) * scale_x)+16)
注意存入xml的值是字符串,故需要str强制性数据转换,scale是缩放尺度,加16是因为边缘都填充了16个像素值。所以缩放后的坐标值相对于224*224图像中的值,因此加上边缘16为填充后256*256图像上的坐标值。
总体的效果展示: