裁剪固定尺寸图片
主要是将大图裁剪成固定尺寸,并转化VOC中的xml格式,要点如下:
- 尺寸不够四舍五入成一张图。(可分为4部分,左上部分固定尺寸,最后一列,最后一行,以及剩余右下角部分)
- 若目标在边缘,自适应扩大图像尺寸
代码如下:
import cv2
import os
import json
import numpy as np
from xml.dom.minidom import Document
IMG_SIZE = 640
LABEL_WIDTH = 30
# 主要裁剪逻辑
def crop_image(data_type = "val"):
img_path = "xxx\\"+data_type+"\images\\"
json_path = "xxx\\"+data_type+"\labels\\"
txt_path = "xxx\VOCdevkit\VOC2007\ImageSets\Main\\"
txt_file = open(txt_path + data_type + ".txt", "w")
img_save_path = "xxx\VOCdevkit\VOC2007\JPEGImages\\"
anno_path = "xxxx\VOCdevkit\VOC2007\Annotations\\"
for file in os.listdir(img_path):
if file.__contains__("png"):
json_name = json_path + file[0:-4] + ".json"
all_points = get_json_points(json_name)
img = cv2.imread(img_path+file)
h, w, _ = img.shape
print(img.shape, round(h/IMG_SIZE),round(w/IMG_SIZE))
index = 0
row = round(h/IMG_SIZE)
col = round(w/IMG_SIZE)
# 完整的640*640
for i in range(row-1):
for j in range(col-1):
index = index + 1
cur_img_points, rect = get_crop_img_points(j * IMG_SIZE, i * IMG_SIZE, (j + 1) * IMG_SIZE,
(i + 1) * IMG_SIZE, all_points)
patch_image = img[rect[1]:rect[3],rect[0]:rect[2],:]
file_name = file[:-4] + "_crop_" + str(index)
json_points_to_xml(cur_img_points, rect[2] - rect[0], rect[3] - rect[1], anno_path, file_name)
cv2.imwrite(img_save_path + file_name +".png", patch_image)
txt_file.write(file_name + "\n")
# 最后一列
for i in range(row - 1):
index = index + 1
cur_img_points, rect = get_crop_img_points((col-1) * IMG_SIZE, i * IMG_SIZE, w,
(i + 1) * IMG_SIZE, all_points)
patch_image = img[rect[1]:rect[3], rect[0]:rect[2], :]
file_name = file[:-4] + "_crop_" + str(index)
json_points_to_xml(cur_img_points, rect[2] - rect[0], rect[3] - rect[1], anno_path, file_name)
cv2.imwrite(img_save_path + "" + file[:-4] + "_crop_" + str(index) + ".png", patch_image)
txt_file.write(file_name + "\n")
# 最后一行
for j in range(col - 1):
index = index + 1
cur_img_points, rect = get_crop_img_points(j * IMG_SIZE, (row-1) * IMG_SIZE, (j + 1) * IMG_SIZE,
h, all_points)
patch_image = img[rect[1]:rect[3], rect[0]:rect[2], :]
file_name = file[:-4] + "_crop_" + str(index)
json_points_to_xml(cur_img_points, rect[2] - rect[0], rect[3] - rect[1], anno_path, file_name)
cv2.imwrite(img_save_path + "" + file[:-4] + "_crop_" + str(index) + ".png", patch_image)
txt_file.write(file_name + "\n")
# 右下角最后一块
index = index + 1
cur_img_points, rect = get_crop_img_points((col-1) * IMG_SIZE, (row - 1) * IMG_SIZE, w, h, all_points)
patch_image = img[rect[1]:rect[3], rect[0]:rect[2], :]
file_name = file[:-4] + "_crop_" + str(index)
json_points_to_xml(cur_img_points, rect[2] - rect[0], rect[3] - rect[1], anno_path, file_name)
cv2.imwrite(img_save_path + "" + file[:-4] + "_crop_" + str(index) + ".png", patch_image)
txt_file.write(file_name + "\n")
# 获取大图像的所有目标中心点
def get_json_points(file_name):
file = open(file_name)
json_file = json.load(file)
points = json_file["shapes"]
all_points = []
for one_point_label in points:
# print(one_point_label)
all_points.append(one_point_label["points"][0])
print(len(all_points),all_points)
return all_points
# 获取裁剪小图的目标中心点,并返回自适应的图像左上顶点,以及右下顶点
def get_crop_img_points(left_top_x,left_top_y,right_bottom_x,right_bottom_y,points):
cur_img_points = []
for point in points:
x = point[0]
y = point[1]
if left_top_x <= x < right_bottom_x and left_top_y <= y < right_bottom_y:
cur_img_points.append([x, y])
cur_img_points = np.asarray(cur_img_points)
if cur_img_points.shape[0] > 0:
[min_x, min_y] = np.amin(cur_img_points, axis=0)
[max_x, max_y] = np.amax(cur_img_points, axis=0)
else:
return cur_img_points, [int(left_top_x),int(left_top_y),int(right_bottom_x),int(right_bottom_y)]
offset = 2
if min_x - LABEL_WIDTH / 2 < left_top_x:
left_top_x = min_x - LABEL_WIDTH / 2 - offset
if min_y - LABEL_WIDTH / 2 < left_top_y:
left_top_y = min_y - LABEL_WIDTH / 2 - offset
if max_x + LABEL_WIDTH / 2 > right_bottom_x:
right_bottom_x = max_x + LABEL_WIDTH / 2 + offset
if max_y + LABEL_WIDTH / 2 > right_bottom_y:
right_bottom_y = max_y + LABEL_WIDTH / 2 + offset
cur_img_points = cur_img_points - [left_top_x,left_top_y]
return cur_img_points, [int(left_top_x),int(left_top_y),int(right_bottom_x),int(right_bottom_y)]
# 转化为VOC中XML格式数据
def json_points_to_xml(points,width,height,annotations_path, file):
xml_file = annotations_path + file + ".xml"
doc = Document()
annotation = doc.createElement("annotation")
doc.appendChild(annotation)
folder = doc.createElement("folder")
folder_text = doc.createTextNode("VOC2007")
folder.appendChild(folder_text)
filename = doc.createElement("filename")
filename_text = doc.createTextNode(file + ".png")
filename.appendChild(filename_text)
size = doc.createElement("size")
annotation.appendChild(folder)
annotation.appendChild(filename)
annotation.appendChild(size)
size_w = doc.createElement("width")
size.appendChild(size_w)
size_w_text = doc.createTextNode(str(width))
size_w.appendChild(size_w_text)
size_h = doc.createElement("height")
size.appendChild(size_h)
size_h_text = doc.createTextNode(str(height))
size_h.appendChild(size_h_text)
depth = doc.createElement("depth")
size.appendChild(depth)
depth_text = doc.createTextNode("1")
depth.appendChild(depth_text)
for center in points:
one_object = doc.createElement("object")
annotation.appendChild(one_object)
object_name = doc.createElement("name")
object_name_text = doc.createTextNode("lgd")
object_name.appendChild(object_name_text)
one_object.appendChild(object_name)
pose = doc.createElement("pose")
pose.appendChild(doc.createTextNode("center"))
truncated = doc.createElement("truncated")
truncated.appendChild(doc.createTextNode("0"))
difficult = doc.createElement("difficult")
difficult.appendChild(doc.createTextNode("0"))
one_object.appendChild(pose)
one_object.appendChild(truncated)
one_object.appendChild(difficult)
bndbox = doc.createElement("bndbox")
xmin = doc.createElement("xmin")
ymin = doc.createElement("ymin")
xmax = doc.createElement("xmax")
ymax = doc.createElement("ymax")
one_object.appendChild(bndbox)
bndbox.appendChild(xmin)
bndbox.appendChild(ymin)
bndbox.appendChild(xmax)
bndbox.appendChild(ymax)
radius = LABEL_WIDTH / 2
x1 = center[0] - radius
y1 = center[1] - radius
x2 = center[0] + radius
y2 = center[1] + radius
xmin.appendChild(doc.createTextNode(str(x1)))
ymin.appendChild(doc.createTextNode(str(y1)))
xmax.appendChild(doc.createTextNode(str(x2)))
ymax.appendChild(doc.createTextNode(str(y2)))
f = open(xml_file, 'w')
f.write(doc.toprettyxml(indent='\t'))
f.close()
crop_image()
验证显示
代码如下:
import os
import json
import cv2
from xml.dom.minidom import Document
import xml.etree.cElementTree as ET
def draw_img_by_xml():
img_path = "xxx\VOCdevkit\VOC2007\JPEGImages\\"
xml_path = "xxx\VOCdevkit\VOC2007\Annotations\\"
for img_name in os.listdir(img_path):
print(img_name)
img = cv2.imread(img_path+img_name)
xml_dir_name = xml_path + img_name[0:-4] + ".xml"
tree = ET.parse(xml_dir_name)
root = tree.getroot()
for object in root.findall('object'):
bndbox = object.find('bndbox')
xmin = int(float(bndbox.find('xmin').text))
ymin = int(float(bndbox.find('ymin').text))
xmax = int(float(bndbox.find('xmax').text))
ymax = int(float(bndbox.find('ymax').text))
print(xmin,ymin,xmax,ymax)
cv2.rectangle(img, (xmin,ymin), (xmax,ymax), (255, 0, 0), 2)
cv2.imshow(img_name, img)
cv2.waitKey(0)