本文对小物体(在原图占比较小的),例如长宽都小于320的物体进行单独拿出来保存,并生大小为320×320的图片和一个只含有一个object的xml,如果检测的物体长宽都大于320,暂时舍弃。其中的"000000.xml"是随便一个xml,保证只含有一个object就可以,用于复制。记录本文的最大用处是改写xml中object的name和bndbox的标签值的方法。
img_path,xml_path,split_img_path_out,split_xml_path_out,分别为原始和分割后的图片和xml路径,需要根据自己的路径更改。
例如000000.xml内容如下,我们会根据提取小物体的位置改写object的name和bndbox的值。
<annotation verified="yes">
<filename>test.jpg</filename>
<path>C:\Users\yasin\Desktop\test</path>
<source>
<database>Unknown</database>
</source>
<size>
<width>400</width>
<height>300</height>
<depth>3</depth>
</size>
<segmented>0</segmented>
<object>
<name>cat</name>
<pose>Unspecified</pose>
<truncated>0</truncated>
<difficult>0</difficult>
<bndbox>
<xmin>80</xmin>
<ymin>69</ymin>
<xmax>144</xmax>
<ymax>89</ymax>
</bndbox>
</object>
</annotation>
代码如下:
import xml.etree.ElementTree as ET
from os import path
import os
import cv2
def get_path_prex(img_dir):
paths = []
prefix = []
img_list=os.listdir(img_dir)
for img_name in img_list:
pre, ending = path.splitext(img_name)
if ending != ".xml":
continue
else:
paths.append(os.path.join(img_dir,img_name))
prefix.append(pre)
return paths, prefix
def change_xml_nbox(target, cnt, new_xmlbox, split_xml_path_out):
# 000000.xml is the initial xml used for generate all the xml
tree = ET.parse("000000.xml")
# get parent nodes
parent_nodes = tree.findall("./")
for parent_node in parent_nodes:
children = parent_node.getchildren()
if parent_node.tag == "object":
children[0].text = target
# since my xml have two types, I add a "is... else..." condition, generally, you only need the second case
if len(children) == 2:
children[1][0].text = str(new_xmlbox[0])
children[1][1].text = str(new_xmlbox[1])
children[1][2].text = str(new_xmlbox[2])
children[1][3].text = str(new_xmlbox[3])
else:
children[4][0].text = str(new_xmlbox[0])
children[4][1].text = str(new_xmlbox[1])
children[4][2].text = str(new_xmlbox[2])
children[4][3].text = str(new_xmlbox[3])
tree.write(split_xml_path_out +"/{}.xml".format("%06d" % cnt), encoding="utf-8", xml_declaration=True)
if __name__=="__main__":
# original image and label path
img_path = './imgs/'
xml_path = './xmls'
# split image and label path
split_img_path_out = './split_img_out'
split_xml_path_out = './split_xml_out'
paths_xml, prefixs = get_path_prex(xml_path)
cnt = 0
for i in range(len(paths_xml)):
in_file = open(paths_xml[i])
tree=ET.parse(in_file)
root = tree.getroot()
size = root.find('size')
w = int(size.find('width').text)
h = int(size.find('height').text)
for obj in root.iter('object'):
cls = obj.find('name').text
xmlbox = obj.find('bndbox')
b = [float(xmlbox.find('xmin').text), float(xmlbox.find('xmax').text), float(xmlbox.find('ymin').text), float(xmlbox.find('ymax').text)]
img_pre = prefixs[i] + ".jpg"
imt_root = img_path
img_path = path.join(imt_root, img_pre)
img = cv2.imread(img_path)
#####no enlarge mehtod, ensure image is not None#####################
# if (img is not None):
# new_img = img[int(b[2]):int(b[3]), int(b[0]):int(b[1])]
# cnt = cnt + 1
# cv2.imwrite("split_img_out/{}.jpg".format("%06d" % cnt), new_img)
# new_xmlbox = [0, 0, b[1]- b[0], b[3]- b[2]]
# change_xml_nbox(cls, cnt, new_xmlbox, split_xml_path_out)
#####enlarge method, ensure image is not None########################
if (img is not None):
inlarge_size = 320
box_height = b[3] - b[2]
box_width = b[1] - b[0]
box_enlarge_h = int(b[2] + inlarge_size)
box_enlarge_w = int(b[0] + inlarge_size)
# enlarge box to [416,416]; box should be discarded if box_height or box_width is bigger than 320
if box_enlarge_h < h and inlarge_size > box_height and box_enlarge_w < w and inlarge_size > box_width:
new_img = img[int(b[2]): int(b[2]) + inlarge_size, int(b[0]): int(b[0])+ inlarge_size]
cnt = cnt + 1
cv2.imwrite(split_img_path_out + "/{}.jpg".format("%06d" % cnt), new_img)
new_xmlbox = [0, 0, box_width, box_height]
change_xml_nbox(cls, cnt, new_xmlbox, split_xml_path_out)