将目标检测大尺寸图片裁剪成固定尺寸图片

最新推荐文章于 2023-08-17 09:24:27 发布

AICVer

最新推荐文章于 2023-08-17 09:24:27 发布

阅读量1.5k

点赞数

分类专栏：目标检测 Python 文章标签：目标检测计算机视觉 python

本文链接：https://blog.csdn.net/u011489887/article/details/125952104

版权

Python 同时被 2 个专栏收录

41 篇文章 0 订阅

订阅专栏

目标检测

8 篇文章 0 订阅

订阅专栏

裁剪固定尺寸图片

主要是将大图裁剪成固定尺寸，并转化VOC中的xml格式，要点如下：

尺寸不够四舍五入成一张图。（可分为4部分，左上部分固定尺寸，最后一列，最后一行，以及剩余右下角部分）
若目标在边缘，自适应扩大图像尺寸
代码如下：

import cv2
import os
import json
import numpy as np
from xml.dom.minidom import Document
IMG_SIZE = 640
LABEL_WIDTH = 30


# 主要裁剪逻辑
def crop_image(data_type = "val"):
    img_path = "xxx\\"+data_type+"\images\\"
    json_path = "xxx\\"+data_type+"\labels\\"
    txt_path = "xxx\VOCdevkit\VOC2007\ImageSets\Main\\"
    txt_file = open(txt_path + data_type + ".txt", "w")

    img_save_path = "xxx\VOCdevkit\VOC2007\JPEGImages\\"
    anno_path = "xxxx\VOCdevkit\VOC2007\Annotations\\"

    for file in os.listdir(img_path):
        if file.__contains__("png"):
            json_name = json_path + file[0:-4] + ".json"
            all_points = get_json_points(json_name)

            img = cv2.imread(img_path+file)
            h, w, _ = img.shape
            print(img.shape, round(h/IMG_SIZE),round(w/IMG_SIZE))
            index = 0
            row = round(h/IMG_SIZE)
            col = round(w/IMG_SIZE)

            # 完整的640*640
            for i in range(row-1):
                for j in range(col-1):
                    index = index + 1

                    cur_img_points, rect = get_crop_img_points(j * IMG_SIZE, i * IMG_SIZE, (j + 1) * IMG_SIZE,
                                                               (i + 1) * IMG_SIZE, all_points)
                    patch_image = img[rect[1]:rect[3],rect[0]:rect[2],:]
                    file_name = file[:-4] + "_crop_" + str(index)
                    json_points_to_xml(cur_img_points, rect[2] - rect[0], rect[3] - rect[1], anno_path, file_name)
                    cv2.imwrite(img_save_path + file_name +".png", patch_image)
                    txt_file.write(file_name + "\n")

            # 最后一列
            for i in range(row - 1):
                index = index + 1
                cur_img_points, rect = get_crop_img_points((col-1) * IMG_SIZE, i * IMG_SIZE, w,
                                                           (i + 1) * IMG_SIZE, all_points)
                patch_image = img[rect[1]:rect[3], rect[0]:rect[2], :]
                file_name = file[:-4] + "_crop_" + str(index)
                json_points_to_xml(cur_img_points, rect[2] - rect[0], rect[3] - rect[1], anno_path, file_name)
                cv2.imwrite(img_save_path + "" + file[:-4] + "_crop_" + str(index) + ".png", patch_image)
                txt_file.write(file_name + "\n")

            # 最后一行
            for j in range(col - 1):
                index = index + 1
                cur_img_points, rect = get_crop_img_points(j * IMG_SIZE, (row-1) * IMG_SIZE, (j + 1) * IMG_SIZE,
                                                           h, all_points)
                patch_image = img[rect[1]:rect[3], rect[0]:rect[2], :]
                file_name = file[:-4] + "_crop_" + str(index)
                json_points_to_xml(cur_img_points, rect[2] - rect[0], rect[3] - rect[1], anno_path, file_name)
                cv2.imwrite(img_save_path + "" + file[:-4] + "_crop_" + str(index) + ".png", patch_image)
                txt_file.write(file_name + "\n")

            # 右下角最后一块
            index = index + 1
            cur_img_points, rect = get_crop_img_points((col-1) * IMG_SIZE, (row - 1) * IMG_SIZE, w, h, all_points)
            patch_image = img[rect[1]:rect[3], rect[0]:rect[2], :]
            file_name = file[:-4] + "_crop_" + str(index)
            json_points_to_xml(cur_img_points, rect[2] - rect[0], rect[3] - rect[1], anno_path, file_name)
            cv2.imwrite(img_save_path + "" + file[:-4] + "_crop_" + str(index) + ".png", patch_image)
            txt_file.write(file_name + "\n")


# 获取大图像的所有目标中心点
def get_json_points(file_name):
    file = open(file_name)
    json_file = json.load(file)
    points = json_file["shapes"]

    all_points = []
    for one_point_label in points:
        # print(one_point_label)
        all_points.append(one_point_label["points"][0])
    print(len(all_points),all_points)
    return all_points


# 获取裁剪小图的目标中心点，并返回自适应的图像左上顶点，以及右下顶点
def get_crop_img_points(left_top_x,left_top_y,right_bottom_x,right_bottom_y,points):
    cur_img_points = []

    for point in points:
        x = point[0]
        y = point[1]
        if left_top_x <= x < right_bottom_x and left_top_y <= y < right_bottom_y:
            cur_img_points.append([x, y])

    cur_img_points = np.asarray(cur_img_points)

    if cur_img_points.shape[0] > 0:
        [min_x, min_y] = np.amin(cur_img_points, axis=0)
        [max_x, max_y] = np.amax(cur_img_points, axis=0)
    else:
        return cur_img_points, [int(left_top_x),int(left_top_y),int(right_bottom_x),int(right_bottom_y)]

    offset = 2
    if min_x - LABEL_WIDTH / 2 < left_top_x:
        left_top_x = min_x - LABEL_WIDTH / 2 - offset
    if min_y - LABEL_WIDTH / 2 < left_top_y:
        left_top_y = min_y - LABEL_WIDTH / 2 - offset
    if max_x + LABEL_WIDTH / 2 > right_bottom_x:
        right_bottom_x = max_x + LABEL_WIDTH / 2 + offset
    if max_y + LABEL_WIDTH / 2 > right_bottom_y:
        right_bottom_y = max_y + LABEL_WIDTH / 2 + offset
    cur_img_points = cur_img_points - [left_top_x,left_top_y]
    return cur_img_points, [int(left_top_x),int(left_top_y),int(right_bottom_x),int(right_bottom_y)]


# 转化为VOC中XML格式数据
def json_points_to_xml(points,width,height,annotations_path, file):
    xml_file = annotations_path + file + ".xml"
    doc = Document()
    annotation = doc.createElement("annotation")
    doc.appendChild(annotation)

    folder = doc.createElement("folder")
    folder_text = doc.createTextNode("VOC2007")
    folder.appendChild(folder_text)

    filename = doc.createElement("filename")
    filename_text = doc.createTextNode(file + ".png")
    filename.appendChild(filename_text)

    size = doc.createElement("size")
    annotation.appendChild(folder)
    annotation.appendChild(filename)
    annotation.appendChild(size)

    size_w = doc.createElement("width")
    size.appendChild(size_w)
    size_w_text = doc.createTextNode(str(width))
    size_w.appendChild(size_w_text)

    size_h = doc.createElement("height")
    size.appendChild(size_h)
    size_h_text = doc.createTextNode(str(height))
    size_h.appendChild(size_h_text)

    depth = doc.createElement("depth")
    size.appendChild(depth)
    depth_text = doc.createTextNode("1")
    depth.appendChild(depth_text)

    for center in points:
        one_object = doc.createElement("object")
        annotation.appendChild(one_object)

        object_name = doc.createElement("name")
        object_name_text = doc.createTextNode("lgd")
        object_name.appendChild(object_name_text)
        one_object.appendChild(object_name)

        pose = doc.createElement("pose")
        pose.appendChild(doc.createTextNode("center"))
        truncated = doc.createElement("truncated")
        truncated.appendChild(doc.createTextNode("0"))
        difficult = doc.createElement("difficult")
        difficult.appendChild(doc.createTextNode("0"))
        one_object.appendChild(pose)
        one_object.appendChild(truncated)
        one_object.appendChild(difficult)

        bndbox = doc.createElement("bndbox")
        xmin = doc.createElement("xmin")
        ymin = doc.createElement("ymin")
        xmax = doc.createElement("xmax")
        ymax = doc.createElement("ymax")
        one_object.appendChild(bndbox)
        bndbox.appendChild(xmin)
        bndbox.appendChild(ymin)
        bndbox.appendChild(xmax)
        bndbox.appendChild(ymax)

        radius = LABEL_WIDTH / 2
        x1 = center[0] - radius
        y1 = center[1] - radius
        x2 = center[0] + radius
        y2 = center[1] + radius

        xmin.appendChild(doc.createTextNode(str(x1)))
        ymin.appendChild(doc.createTextNode(str(y1)))
        xmax.appendChild(doc.createTextNode(str(x2)))
        ymax.appendChild(doc.createTextNode(str(y2)))

    f = open(xml_file, 'w')
    f.write(doc.toprettyxml(indent='\t'))
    f.close()


crop_image()

验证显示

代码如下：

import os
import json
import cv2
from xml.dom.minidom import Document
import xml.etree.cElementTree as ET


def draw_img_by_xml():
    img_path = "xxx\VOCdevkit\VOC2007\JPEGImages\\"
    xml_path = "xxx\VOCdevkit\VOC2007\Annotations\\"
    for img_name in os.listdir(img_path):
        print(img_name)
        img = cv2.imread(img_path+img_name)

        xml_dir_name = xml_path + img_name[0:-4] + ".xml"
        tree = ET.parse(xml_dir_name)
        root = tree.getroot()

        for object in root.findall('object'):
            bndbox = object.find('bndbox')
            xmin = int(float(bndbox.find('xmin').text))
            ymin = int(float(bndbox.find('ymin').text))
            xmax = int(float(bndbox.find('xmax').text))
            ymax = int(float(bndbox.find('ymax').text))
            print(xmin,ymin,xmax,ymax)
            cv2.rectangle(img, (xmin,ymin), (xmax,ymax), (255, 0, 0), 2)
        cv2.imshow(img_name, img)
        cv2.waitKey(0)