因为tt100k图片太大,所以需要将图片截取包含目标的一部分并保存xml文件,因此写了截取脚本
原图为:
截取后为:
from __future__ import division
import os
import cv2
from PIL import Image
import xml.dom.minidom as tree
import numpy as np
ImgPath = 'D:/yolov5/CFyolov5-tt100k/resize/image/'
AnnoPath = 'D:/yolov5/CFyolov5-tt100k/resize/label_resize/'
ProcessedPath = 'D:/yolov5/CFyolov5-tt100k/resize/out/'
prefix_str = '''<annotation>
<folder>HollywoodHeads</folder>
<filename>{}.jpeg</filename>
<source>
<database>HollywoodHeads 2015 Database</database>
<annotation>HollywoodHeads 2015</annotation>
<image>WILLOW</image>
</source>
<size>
<width>1171</width>
<height>647</height>
<depth>3</depth>
</size>
<segmented>0</segmented>'''
suffix = '</annotation>'
new_head = ''' <object>
<name>{}</name>
<bndbox>
<xmin>{}</xmin>
<ymin>{}</ymin>
<xmax>{}</xmax>
<ymax>{}</ymax>
</bndbox>
<difficult>0</difficult>
</object>'''
imagelist = os.listdir(ImgPath)
for image in imagelist:
image_pre, ext = os.path.splitext(image)#image_pre图片名,EXT=jpg
imgfile = ImgPath + image#图片路径
img_cv = cv2.imread(imgfile)
xmlfile = AnnoPath + image_pre + '.xml'#拼接得到图片对应的xml文件路径
DomTree = tree.parse(xmlfile) # 打开xml文档
annotation = DomTree.documentElement # 得到xml文档对象
filenamelist = annotation.getElementsByTagName('filename') # [<DOM Element: filename at 0x381f788>]
filename = filenamelist[0].childNodes[0].data # 获取XML节点值,获得filename=13.jpg
namelist = annotation.getElementsByTagName('name')
# savepath = ProcessedPath + objectname
# if not os.path.exists(savepath):
# os.makedirs(savepath)
bndbox = annotation.getElementsByTagName('bndbox')
objectname=[]
for boxname_index in range(0,len(bndbox)):#将objectname添加进列表objectname中
objectname.append(namelist[boxname_index].childNodes[0].data)#获得第一个object中的name
# b = bndbox[1]
# print(b.nodeName)
i = 1
a = 2048 #[0, 300, 0, 300]
b = 2018 #[0, 0, 300, 300]
h = 2048 #300
# cropboxes = []
def select(m, n):
bbox = []
xmin = 0
xmax = 0
ymin = 0
ymax = 0
for index in range(0, len(bndbox)):
x1_list = bndbox[index].getElementsByTagName('xmin') # 寻找有着给定标签名的所有的元素
x1 = int(x1_list[0].childNodes[0].data)
if xmin > x1 or xmin == 0:
xmin = x1
y1_list = bndbox[index].getElementsByTagName('ymin')
y1 = int(y1_list[0].childNodes[0].data)
if ymin > y1 or ymin == 0:
ymin = y1
x2_list = bndbox[index].getElementsByTagName('xmax')
x2 = int(x2_list[0].childNodes[0].data)
if xmax < x2 or xmax == 0:
xmax = x2
y2_list = bndbox[index].getElementsByTagName('ymax')
y2 = int(y2_list[0].childNodes[0].data)
if ymax < y2 or ymax == 0:
ymax = y2
print("the number of the box is", index)
print("the xy", x1, y1, x2, y2)
# print("图片裁剪的结果",xmin,ymin,xmax,ymax)
w = xmax - xmin
h = ymax - ymin
if w<=640 and h<=640 : #根据情况,设定截图的坐标(xmin,ymin,xmax,ymax)
if xmin-(640-w)/2 >= 0 and ymin-(640-h)/2>=0 and xmax+(640-w)/2<=2048 and ymax+(640-w)/2<=2048:
xmin = int(xmin - (640-w)/2)
xmax = int(xmax + (640-w)/2)
ymin = int(ymin - (640-h)/2)
ymax = int(ymax + (640-w)/2)
elif xmin-(640-w)/2 >= 0 and ymin-(640-h)/2 <=0 and xmax+(640-w)/2<=2048 :
xmin = int(xmin - (640 - w)/2)
xmax= int(xmax + (640-w)/2)
ymin= ymin
ymax = ymax +(640-h)
elif xmin-(640-w)/2 <= 0 and ymin-(640-h)/2>=0 and ymax+(640-w)/2<=2048:
xmin = xmin
xmax = xmax + (640-w)
ymin = int(ymin - (640-h)/2)
ymax = int(ymax +(640-w)/2)
elif xmin-(640-w)/2 <= 0 and ymin-(640-h)/2<=0 and xmax+(640-w)/2<=2048 and ymax+(640-w)/2<=2048:
xmin = xmin
xmax = xmax + (640 - w )
ymin = ymin
ymax = ymax + (640 - h)
# obj_img = img_cv[int(ymin):int(ymax), xmin:xmax] # cv2裁剪出目标框中的图片
#
# cv2.imwrite(ProcessedPath + '\\' + '%s' % (image_pre) + '.jpg', obj_img) # 保存裁剪图片
for index in range(0, len(bndbox)):#根据截图的坐标分别修改xml中的每个bbox
x1_list = bndbox[index].getElementsByTagName('xmin') # 寻找有着给定标签名的所有的元素
x1 = int(x1_list[0].childNodes[0].data)
x1_resize = x1 - xmin
y1_list = bndbox[index].getElementsByTagName('ymin')
y1 = int(y1_list[0].childNodes[0].data)
y1_resize = y1 - ymin
x2_list = bndbox[index].getElementsByTagName('xmax')
x2 = int(x2_list[0].childNodes[0].data)
x2_resize = x2 - xmin
y2_list = bndbox[index].getElementsByTagName('ymax')
y2 = int(y2_list[0].childNodes[0].data)
y2_resize = y2 - ymin
# print("the number of the box is", index)
# print("the xy", x1, y1, x2, y2)
# print("图片裁剪的结果",xmin,ymin,xmax,ymax)
bbox.append([x1_resize, y1_resize, x2_resize, y2_resize]) # 更新后的标记框
if bbox is not None:
return xmin,ymin,xmax,ymax,bbox
else:
return 0
#想要将图片分割成四个300*300的
# cropboxes = np.array(#左上角与右下角的坐标,为[0,0,300,300],[300,0,600,300],[0,300,300,600],[300,300,600,600]
# [[a[0], b[0], a[0] + h, b[0] + h], [a[1], b[1], a[1] + h, b[1] + h], [a[2], b[2], a[2] + h, b[2] + h],
# [a[3], b[3], a[3] + h, b[3] + h]])
img = Image.open(imgfile)
# for j in range(0, len(cropboxes)):
# print("the img number is :", j)
xmin,ymin,xmax,ymax,Bboxes = select(a, b)
if Bboxes is not 0:
head_str = ''
name_index = 0
for Bbox in Bboxes:
head_str = head_str + new_head.format(objectname[name_index],Bbox[0], Bbox[1], Bbox[2], Bbox[3])
name_index =name_index + 1
obj_img = img_cv[ymin:ymax, xmin:xmax] # cv2裁剪出目标框中的图片
xml = prefix_str.format(image) + head_str + suffix
# obj_img.save(ProcessedPath + '/' + image_pre + '.jpg')
cv2.imwrite(ProcessedPath + image_pre + '.jpg',obj_img)
open(AnnoPath + '{}.xml'.format(image_pre), 'w').write(xml)
```python