裁剪策略:
从左至右,从上至下滑动裁剪320x320大小的子图,滑动步长为160,当最右边超出边界,以边界为边,向左裁剪320;最下端超出边界部分也是这样处理。
xml文件格式是Pascal VOC格式。
代码如下:
import os
import xml.etree.ElementTree as ET
from PIL import Image
def crop_one_image_and_xml(image_path, annotation_path, output_path, filename):
# -*- coding: encoding -*-
# 原图文件和标注文件的路径
# image_path = 'path/to/original/image'
# annotation_path = 'path/to/original/annotation'
# 小块图片和标注文件保存的路径
# output_path = 'path/to/output/folder'
# 小块图片的尺寸和裁剪策略
block_size = (320, 320)
stride = (160, 160)
# 解析原图的标注文件
tree = ET.parse(annotation_path)
root = tree.getroot() # 获取ElementTree对象的根节点。根据XML文件的结构,每个XML文件都有一个根节点。getroot()方法返回这个根节点,并将其保存在变量root中。这个方法返回的是一个 Element 类型的对象,它代表了整个XML文件的根节点。
# 获取原图的大小
size = (
int(root.find('size/width').text), int(root.find('size/height').text)) # 获取XML文件中 size/width 这个节点的值,并将其转换为整数类型。
# 计算小块图片的行数和列数
rows = (size[1] - block_size[1]) // stride[1] + 2
cols = (size[0] - block_size[0]) // stride[0] + 2
# 遍历每个小块图片的位置
for row in range(rows):
for col in range(cols):
# 计算小块图片的左上角在原图中的位置坐标
x0 = col * stride[0]
y0 = row * stride[1]
x1 = x0 + block_size[0]
y1 = y0 + block_size[1]
# 如果超出原图边界,则调整小块图片的大小和位置
if x1 > size[0]:
x1 = size[0]
x0 = x1 - block_size[0]
if y1 > size[1]:
y1 = size[1]
y0 = y1 - block_size[1]
# 创建小块图片并保存
block_image = Image.open(image_path).crop((x0, y0, x1, y1))
block_filename = f'{filename}_{x0, y0}-{x1, y1}.tif'
block_image.save(os.path.join(output_path, block_filename))
# 检查是否含有标注框,True代表没有标注框,False就代表有标注框
check_bndbox_is_none = True
# 创建小块图片的标注文件并保存
'''
ET 是 ElementTree 库的别名或者是导入的模块,使用 ET.Element('annotation') 创建了一个名为 annotation 的元素,它代表了这个XML文档的根节点。
Element() 方法会返回一个 Element 类型的对象,该对象代表了一个XML元素。
这个XML文档的根节点是 annotation,它是这个XML文档的最上层元素,它可以包含其它元素或者属性,而其它元素或者属性都是它的子元素或者子属性。
你可以通过在这个根元素上调用方法,如 .append() 或 .attrib,来添加子元素或者属性。
'''
block_root = ET.Element('annotation')
folder_elem = ET.Element('folder')
folder_elem.text = str('Images')
block_root.append(folder_elem)
filename_elem = ET.Element('filename')
filename_elem.text = str(os.path.join(filename + ".tif"))
block_root.append(filename_elem)
path_elem = ET.Element('path')
path_elem.text = str(os.path.join(annotation_path, filename + ".tif"))
block_root.append(path_elem)
source_elem = ET.Element('source')
database_elem = ET.Element('database')
database_elem.text = str('Unknown')
source_elem.append(database_elem)
block_root.append(source_elem)
# 添加图像大小信息
'''
使用 ElementTree 库创建一个 XML 文档中的 size 元素,并在其中添加 width 和 height 子元素,
然后将它们的文本内容设置为图像 block_image 的宽度和高度,最终生成以下 XML 结构:
<size>
<width>图像宽度</width>
<height>图像高度</height>
</size>
最后将size 元素 添加到根节点 block_root = ET.Element('annotation') 中
'''
size_elem = ET.Element('size')
width_elem = ET.Element('width')
width_elem.text = str(block_image.width)
height_elem = ET.Element('height')
height_elem.text = str(block_image.height)
depth_elem = ET.Element('depth')
depth_elem.text = str('1')
size_elem.append(width_elem)
size_elem.append(height_elem)
size_elem.append(depth_elem)
block_root.append(size_elem)
segmented_elem = ET.Element('segmented')
segmented_elem.text = str('0')
block_root.append(segmented_elem)
# 遍历原图的目标,并添加到小块图片的标注文件中
'''
使用了 ElementTree 库中的 findall() 方法和 find() 方法来查找 XML 元素中的特定子元素,并将其文本内容转换为 Python 中的数据类型。
for obj in root.findall('object'): 找到该 XML 元素中所有名为 object 的子元素,并对每一个 object 子元素执行下面的代码。
在循环内部,obj.find('name').text 获取了当前 object 子元素下的名为 name 的子元素,并返回该子元素的文本内容,这里是一个字符串类型,赋值给了 name 变量。
'''
for obj in root.findall('object'): # root = ET.parse(annotation_path).getroot() 原xml文件的根节点
name = obj.find('name').text # 原图标注框的类别名
pose = obj.find('pose').text
truncated = obj.find('truncated').text
difficult = obj.find('difficult').text
xmin = int(obj.find('bndbox/xmin').text) # 原图的标注框的左上点的坐标
ymin = int(obj.find('bndbox/ymin').text) # 原图的标注框的左上点的坐标
xmax = int(obj.find('bndbox/xmax').text) # 原图的标注框的右下点的坐标
ymax = int(obj.find('bndbox/ymax').text) # 原图的标注框的右下点的坐标
# 将目标框的坐标转换为小块图片的相对坐标 x0y0小块图片的左上角右下角在原图中的位置坐标
xmin -= x0
ymin -= y0
xmax -= x0
ymax -= y0
# 如果目标框在小块图片内,则添加到标注文件中
if 0 < xmin < block_image.width and 0 < ymin < block_image.height and \
0 < xmax < block_image.width and 0 < ymax < block_image.height:
obj_elem = ET.Element('object') # 创建 object 属性
name_elem = ET.Element('name') # 创建 name 属性
name_elem.text = name # name = obj.find('name').text name的值没有改变
pose_elem = ET.Element('pose')
pose_elem.text = pose
truncated_elem = ET.Element('truncated')
truncated_elem.text = truncated
difficult_elem = ET.Element('difficult')
difficult_elem.text = difficult
obj_elem.append(name_elem) # 将name加入到 object 节点
obj_elem.append(pose_elem)
obj_elem.append(truncated_elem)
obj_elem.append(difficult_elem)
bndbox_elem = ET.Element('bndbox') # 创建 bndbox 属性
xmin_elem = ET.Element('xmin') # 创建 xmin 属性
xmin_elem.text = str(xmin)
ymin_elem = ET.Element('ymin') # 创建 ymin 属性
ymin_elem.text = str(ymin)
xmax_elem = ET.Element('xmax') # 创建 xmax 属性
xmax_elem.text = str(xmax)
ymax_elem = ET.Element('ymax') # 创建 ymax 属性
ymax_elem.text = str(ymax)
bndbox_elem.append(xmin_elem)
bndbox_elem.append(ymin_elem)
bndbox_elem.append(xmax_elem)
bndbox_elem.append(ymax_elem)
obj_elem.append(bndbox_elem)
block_root.append(obj_elem)
check_bndbox_is_none = False
elif 0 < xmin < block_image.width and 0 < ymin < block_image.height and xmax > block_image.width and 0 < ymax < block_image.height:
obj_elem = ET.Element('object') # 创建 object 属性
name_elem = ET.Element('name') # 创建 name 属性
name_elem.text = name # name = obj.find('name').text name的值没有改变
pose_elem = ET.Element('pose')
pose_elem.text = pose
truncated_elem = ET.Element('truncated')
truncated_elem.text = truncated
difficult_elem = ET.Element('difficult')
difficult_elem.text = difficult
obj_elem.append(name_elem) # 将name加入到 object 节点
obj_elem.append(pose_elem)
obj_elem.append(truncated_elem)
obj_elem.append(difficult_elem)
bndbox_elem = ET.Element('bndbox') # 创建 bndbox 属性
xmin_elem = ET.Element('xmin') # 创建 xmin 属性
xmin_elem.text = str(xmin)
ymin_elem = ET.Element('ymin') # 创建 ymin 属性
ymin_elem.text = str(ymin)
xmax_elem = ET.Element('xmax') # 创建 xmax 属性
xmax_elem.text = str(block_image.width)
ymax_elem = ET.Element('ymax') # 创建 ymax 属性
ymax_elem.text = str(ymax)
bndbox_elem.append(xmin_elem)
bndbox_elem.append(ymin_elem)
bndbox_elem.append(xmax_elem)
bndbox_elem.append(ymax_elem)
obj_elem.append(bndbox_elem)
block_root.append(obj_elem)
check_bndbox_is_none = False
elif 0 < xmin < block_image.width and 0 < ymin < block_image.height and 0 < xmax < block_image.width and ymax > block_image.height:
obj_elem = ET.Element('object') # 创建 object 属性
name_elem = ET.Element('name') # 创建 name 属性
name_elem.text = name # name = obj.find('name').text name的值没有改变
pose_elem = ET.Element('pose')
pose_elem.text = pose
truncated_elem = ET.Element('truncated')
truncated_elem.text = truncated
difficult_elem = ET.Element('difficult')
difficult_elem.text = difficult
obj_elem.append(name_elem) # 将name加入到 object 节点
obj_elem.append(pose_elem)
obj_elem.append(truncated_elem)
obj_elem.append(difficult_elem)
bndbox_elem = ET.Element('bndbox') # 创建 bndbox 属性
xmin_elem = ET.Element('xmin') # 创建 xmin 属性
xmin_elem.text = str(xmin)
ymin_elem = ET.Element('ymin') # 创建 ymin 属性
ymin_elem.text = str(ymin)
xmax_elem = ET.Element('xmax') # 创建 xmax 属性
xmax_elem.text = str(xmax)
ymax_elem = ET.Element('ymax') # 创建 ymax 属性
ymax_elem.text = str(block_image.height)
bndbox_elem.append(xmin_elem)
bndbox_elem.append(ymin_elem)
bndbox_elem.append(xmax_elem)
bndbox_elem.append(ymax_elem)
obj_elem.append(bndbox_elem)
block_root.append(obj_elem)
check_bndbox_is_none = False
elif 0 < xmin < block_image.width and ymin < 0 and 0 < xmax < block_image.width and 0 < ymax < block_image.height:
obj_elem = ET.Element('object') # 创建 object 属性
name_elem = ET.Element('name') # 创建 name 属性
name_elem.text = name # name = obj.find('name').text name的值没有改变
pose_elem = ET.Element('pose')
pose_elem.text = pose
truncated_elem = ET.Element('truncated')
truncated_elem.text = truncated
difficult_elem = ET.Element('difficult')
difficult_elem.text = difficult
obj_elem.append(name_elem) # 将name加入到 object 节点
obj_elem.append(pose_elem)
obj_elem.append(truncated_elem)
obj_elem.append(difficult_elem)
bndbox_elem = ET.Element('bndbox') # 创建 bndbox 属性
xmin_elem = ET.Element('xmin') # 创建 xmin 属性
xmin_elem.text = str(xmin)
ymin_elem = ET.Element('ymin') # 创建 ymin 属性
ymin_elem.text = str(0)
xmax_elem = ET.Element('xmax') # 创建 xmax 属性
xmax_elem.text = str(xmax)
ymax_elem = ET.Element('ymax') # 创建 ymax 属性
ymax_elem.text = str(ymax)
bndbox_elem.append(xmin_elem)
bndbox_elem.append(ymin_elem)
bndbox_elem.append(xmax_elem)
bndbox_elem.append(ymax_elem)
obj_elem.append(bndbox_elem)
block_root.append(obj_elem)
check_bndbox_is_none = False
elif xmin < 0 and 0 < ymin < block_image.height and 0 < xmax < block_image.width and 0 < ymax < block_image.height:
obj_elem = ET.Element('object') # 创建 object 属性
name_elem = ET.Element('name') # 创建 name 属性
name_elem.text = name # name = obj.find('name').text name的值没有改变
pose_elem = ET.Element('pose')
pose_elem.text = pose
truncated_elem = ET.Element('truncated')
truncated_elem.text = truncated