根据xml标注将多个目标同时截取成一张图片，并修改xml文件

weixin_47924038

已于 2022-03-30 15:52:27 修改

阅读量1.3k

点赞数 1

分类专栏： opencv学习文章标签： pytorch 深度学习神经网络

于 2022-03-30 11:16:28 首次发布

本文链接：https://blog.csdn.net/weixin_47924038/article/details/123841701

版权

opencv学习专栏收录该内容

8 篇文章 0 订阅

订阅专栏

因为tt100k图片太大，所以需要将图片截取包含目标的一部分并保存xml文件，因此写了截取脚本

原图为：
在这里插入图片描述

截取后为：
在这里插入图片描述

from __future__ import division
import os

import cv2
from PIL import Image
import xml.dom.minidom as tree
import numpy as np

ImgPath = 'D:/yolov5/CFyolov5-tt100k/resize/image/'
AnnoPath = 'D:/yolov5/CFyolov5-tt100k/resize/label_resize/'
ProcessedPath = 'D:/yolov5/CFyolov5-tt100k/resize/out/'

prefix_str = '''<annotation>
	<folder>HollywoodHeads</folder>
	<filename>{}.jpeg</filename>
	<source>
		<database>HollywoodHeads 2015 Database</database>
		<annotation>HollywoodHeads 2015</annotation>
		<image>WILLOW</image>
	</source>
	<size>
		<width>1171</width>
		<height>647</height>
		<depth>3</depth>
	</size>
	<segmented>0</segmented>'''

suffix = '</annotation>'

new_head = '''	<object>
		<name>{}</name>
		<bndbox>
			<xmin>{}</xmin>
			<ymin>{}</ymin>
			<xmax>{}</xmax>
			<ymax>{}</ymax>
		</bndbox>
		<difficult>0</difficult>
	</object>'''
imagelist = os.listdir(ImgPath)
for image in imagelist:
    image_pre, ext = os.path.splitext(image)#image_pre图片名，EXT=jpg
    imgfile = ImgPath + image#图片路径
    img_cv = cv2.imread(imgfile)
    xmlfile = AnnoPath + image_pre + '.xml'#拼接得到图片对应的xml文件路径

    DomTree = tree.parse(xmlfile)  # 打开xml文档
    annotation = DomTree.documentElement  # 得到xml文档对象

    filenamelist = annotation.getElementsByTagName('filename')  # [<DOM Element: filename at 0x381f788>]
    filename = filenamelist[0].childNodes[0].data  # 获取XML节点值，获得filename=13.jpg
    namelist = annotation.getElementsByTagName('name')

    # savepath = ProcessedPath + objectname
    # if not os.path.exists(savepath):
    #     os.makedirs(savepath)

    bndbox = annotation.getElementsByTagName('bndbox')

    objectname=[]
    for boxname_index in range(0,len(bndbox)):#将objectname添加进列表objectname中
        objectname.append(namelist[boxname_index].childNodes[0].data)#获得第一个object中的name
    # b = bndbox[1]
    # print(b.nodeName)
    i = 1
    a = 2048 #[0, 300, 0, 300]
    b = 2018 #[0, 0, 300, 300]
    h = 2048 #300
    # cropboxes = []


    def select(m, n):
        bbox = []
        xmin = 0
        xmax = 0
        ymin = 0
        ymax = 0
        for index in range(0, len(bndbox)):
            x1_list = bndbox[index].getElementsByTagName('xmin')  # 寻找有着给定标签名的所有的元素
            x1 = int(x1_list[0].childNodes[0].data)
            if xmin > x1 or xmin == 0:
                xmin = x1
            y1_list = bndbox[index].getElementsByTagName('ymin')
            y1 = int(y1_list[0].childNodes[0].data)
            if ymin > y1 or ymin == 0:
                ymin = y1
            x2_list = bndbox[index].getElementsByTagName('xmax')
            x2 = int(x2_list[0].childNodes[0].data)
            if xmax < x2 or xmax == 0:
                xmax = x2
            y2_list = bndbox[index].getElementsByTagName('ymax')
            y2 = int(y2_list[0].childNodes[0].data)
            if ymax < y2 or ymax == 0:
                ymax = y2
            print("the number of the box is", index)
            print("the xy", x1, y1, x2, y2)
            # print("图片裁剪的结果",xmin,ymin,xmax,ymax)

        w = xmax - xmin
        h = ymax - ymin
        if w<=640 and h<=640 :  #根据情况，设定截图的坐标（xmin,ymin,xmax,ymax）
            if xmin-(640-w)/2 >= 0 and ymin-(640-h)/2>=0 and xmax+(640-w)/2<=2048 and ymax+(640-w)/2<=2048:
                xmin = int(xmin - (640-w)/2)
                xmax = int(xmax + (640-w)/2)
                ymin = int(ymin - (640-h)/2)
                ymax = int(ymax + (640-w)/2)
            elif xmin-(640-w)/2 >= 0 and ymin-(640-h)/2 <=0 and xmax+(640-w)/2<=2048  :
                xmin = int(xmin - (640 - w)/2)
                xmax= int(xmax + (640-w)/2)
                ymin= ymin
                ymax = ymax +(640-h)
            elif xmin-(640-w)/2 <= 0 and ymin-(640-h)/2>=0 and ymax+(640-w)/2<=2048:
                xmin = xmin
                xmax = xmax + (640-w)
                ymin = int(ymin - (640-h)/2)
                ymax = int(ymax +(640-w)/2)
            elif xmin-(640-w)/2 <= 0 and ymin-(640-h)/2<=0 and xmax+(640-w)/2<=2048 and ymax+(640-w)/2<=2048:
                xmin = xmin
                xmax = xmax + (640 - w )
                ymin = ymin
                ymax = ymax + (640 - h)

        # obj_img = img_cv[int(ymin):int(ymax), xmin:xmax]  # cv2裁剪出目标框中的图片
        #
        # cv2.imwrite(ProcessedPath + '\\' + '%s' % (image_pre) + '.jpg', obj_img)  # 保存裁剪图片
        for index in range(0, len(bndbox)):#根据截图的坐标分别修改xml中的每个bbox
            x1_list = bndbox[index].getElementsByTagName('xmin')  # 寻找有着给定标签名的所有的元素
            x1 = int(x1_list[0].childNodes[0].data)
            x1_resize = x1 - xmin

            y1_list = bndbox[index].getElementsByTagName('ymin')
            y1 = int(y1_list[0].childNodes[0].data)
            y1_resize = y1 - ymin

            x2_list = bndbox[index].getElementsByTagName('xmax')
            x2 = int(x2_list[0].childNodes[0].data)
            x2_resize = x2 - xmin

            y2_list = bndbox[index].getElementsByTagName('ymax')
            y2 = int(y2_list[0].childNodes[0].data)
            y2_resize = y2 - ymin
            # print("the number of the box is", index)
            # print("the xy", x1, y1, x2, y2)
            # print("图片裁剪的结果",xmin,ymin,xmax,ymax)
            bbox.append([x1_resize, y1_resize, x2_resize, y2_resize])  # 更新后的标记框


        if bbox is not None:
            return xmin,ymin,xmax,ymax,bbox
        else:
            return 0

    #想要将图片分割成四个300*300的
    # cropboxes = np.array(#左上角与右下角的坐标，为[0,0,300,300],[300,0,600,300],[0,300,300,600],[300,300,600,600]
    #     [[a[0], b[0], a[0] + h, b[0] + h], [a[1], b[1], a[1] + h, b[1] + h], [a[2], b[2], a[2] + h, b[2] + h],
    #      [a[3], b[3], a[3] + h, b[3] + h]])
    img = Image.open(imgfile)
    # for j in range(0, len(cropboxes)):
    #     print("the img number is :", j)
    xmin,ymin,xmax,ymax,Bboxes = select(a, b)
    if Bboxes is not 0:
        head_str = ''
        name_index = 0
        for Bbox in Bboxes:

            head_str = head_str + new_head.format(objectname[name_index],Bbox[0], Bbox[1], Bbox[2], Bbox[3])
            name_index =name_index + 1
    obj_img = img_cv[ymin:ymax, xmin:xmax]  # cv2裁剪出目标框中的图片
    xml = prefix_str.format(image) + head_str + suffix
    # obj_img.save(ProcessedPath + '/' + image_pre + '.jpg')
    cv2.imwrite(ProcessedPath + image_pre + '.jpg',obj_img)
    open(AnnoPath + '{}.xml'.format(image_pre), 'w').write(xml)
```python