图像很大几千万将标注样本在原图标完后以标签中心裁切640大小的样本用于yolo训练

原创已于 2023-04-24 22:12:57 修改 · 1.1k 阅读

9 ·

CC 4.0 BY-SA版权

文章标签：

#YOLO #python #深度学习

于 2023-04-24 22:03:36 首次发布

机器学习/深度学习/人工智能/情感计算同时被 3 个专栏收录

223 篇文章

订阅专栏

机器视觉与图形图像

211 篇文章

订阅专栏

Opencv/Halcon

157 篇文章

订阅专栏

该代码段展示了一个Python脚本，用于处理高分辨率图像中的小目标检测。它首先使用Labelme工具进行标注，然后找到标注区域的最小包围框，计算中心并裁剪出640*640的训练样本。如果目标位于边界，脚本会调整边界线以适应。脚本还涉及图像的平移、缩放和坐标转换，以确保裁剪后的图像和对应的标注文件正确无误。

两千万像素的图，要检测的东西很小，可以先用labelme标注

测试图像

然后通过以找到标注区域的最小包围框，再算出包围框中心，从而裁切出640*640的训练样本图片，如果在检测对象在框的边界，则通过连接离边界最近的两点和之前的多边形形成标注。这种处理接近边界的情况，不是很理想，可以通过直接用边界的线替代超出边界的部分。

import numpy as np
import os
import cv2
import xml.etree.ElementTree as ET
import shutil
import random
import json
import base64
from PIL import Image
from copy import deepcopy

np.random.seed(1337)    # 可重现随机数


import shutil
# ul左上角，lr右下角
def pingyi(pic_ul_point, pic_lr_point): # 左上角，右下角
    if pic_ul_point[0] < 0:
        pic_lr_point[0] += abs(0 - pic_ul_point[0])
        pic_ul_point[0] = 0
    if pic_ul_point[1] < 0:
        pic_lr_point[1] += abs(0 - pic_ul_point[1])
        pic_ul_point[1] = 0
        # 图像最大值
    if pic_lr_point[0] > 5472:
        pic_ul_point[0] -= abs(5472 - pic_lr_point[0])
        pic_lr_point[0] = 5472
    if pic_lr_point[1] > 3648:
        pic_ul_point[1] -= abs(3648 - pic_lr_point[1])
        pic_lr_point[1] = 3648
    return pic_ul_point, pic_lr_point

def pdInPic(shapes_list_i2, pic_ul_point, pic_lr_point):
    points = shapes_list_i2["points"]
    x_min, y_min, x_max, y_max = points[0][0], points[0][1], points[0][0], points[0][1]
    for points_i in points:
        if x_min > points_i[0]:
            x_min = points_i[0]
        if x_max < points_i[0]:
            x_max = points_i[0]
        if y_min > points_i[1]:
            y_min = points_i[1]
        if y_max < points_i[1]:
            y_max = points_i[1]
    xmid, ymid = int((x_max - x_min) / 2 + x_min), int((y_max - y_min) / 2 + y_min)

    flag_shapeInArea = 0
    if (pic_ul_point[0] < xmid < pic_lr_point[0]) and (pic_ul_point[1] < ymid < pic_lr_point[1]):
        flag_shapeInArea = 1
    return flag_shapeInArea


def crop640(path_input, path_output):

    # 设置图片和标注文件列表
    # imgs_list = os.listdir(path_input + "/imgs")
    # jsons_list = os.listdir(path_input + "/jsons")
    imgs_list = os.listdir(path_input)
    jsons_list = os.listdir(path_input)
    # imgs_outPath = save_path + "/imgs"
    # jsons_outPath = save_path + "jsons"
    for i in imgs_list:
        if i.endswith((".png", ".jpg", ".bmp", ".jpeg")):
            j = i[:-4] + ".json"
            if j in jsons_list:
                # 读取图片，计算分割后w,h
                img0 = cv2.imread(path_input + "/" + i, 1)
                w0, h0 = img0.shape[1], img0.shape[0]
                w, h = 640, 640

                # 读取原来json内容
                f0 = open(path_input + '/' + j, "r", encoding="utf-8")
                json_file = json.load(f0)
                shapes_list = json_file["shapes"]
                y_version = json_file["version"]
                f0.close()

                # 修改并保存
                idex = 0
                for shapes_list_i in shapes_list:  # 单个缺陷
                    idex += 1
                    points = shapes_list_i["points"]
                    new_shapes_list = []

                    # 判断缺陷最大包围框是否大于640*640
                    flagIn_640 = 0
                    x_min, y_min, x_max, y_max = points[0][0], points[0][1], points[0][0], points[0][1]
                    for points_i in points:
                        if x_min > points_i[0]:
                            x_min = points_i[0]
                        if x_max < points_i[0]:
                            x_max = points_i[0]
                        if y_min > points_i[1]:
                            y_min = points_i[1]
                        if y_max < points_i[1]:
                            y_max = points_i[1]
                    if (y_max-y_min) < h and (x_max-x_min) < w:
                        flagIn_640 = 1
                    #
                    # cv2.rectangle(img0, (int(x_min), int(y_min)), (int(x_max), int(y_max)), (0, 255, 0), 1)
                    # cv2.imwrite('./linshi01.png', img0)
                    # # cv2.imshow("imge", img0)
                    # # cv2.waitKey()


                    # 缺陷小于640，裁剪周围640*640区域，只保存缺陷中心点在图片内的缺陷，其余忽略，保存新图片
                    # 标注同时改变坐标，保存新txt
                    if flagIn_640:
                        xmid, ymid = int((x_max-x_min)/2+x_min), int((y_max-y_min)/2+y_min)
                        pic_ul_point, pic_lr_point = [xmid-320, ymid-320], [xmid+320, ymid+320]
                        # 如果裁剪图片超过原图，平移
                        pic_ul_point, pic_lr_point = pingyi(pic_ul_point, pic_lr_point)
                        shapes_list2 = deepcopy(shapes_list)
                        for shapes_list_i2 in shapes_list2:
                            # 判断单个缺陷是否全部包含在图片内
                            flagInPic = pdInPic(shapes_list_i2, pic_ul_point, pic_lr_point)
                            if flagInPic:
                                new_shapes_list.append(shapes_list_i2)

                        # 保存裁剪图片
                        crop_img = img0[pic_ul_point[1]:pic_lr_point[1], pic_ul_point[0]:pic_lr_point[0]]
                        cv2.imwrite(path_output + '/' + j[:-4] + "_" + str(idex) + '.png', crop_img)

                        # 改变坐标，保存标注文件
                        new_shapes_list2 = []
                        for shapes_list_i3 in new_shapes_list:
                            points = shapes_list_i3["points"]
                            new_Points = []
                            for point in points:
                                new_point = [point[0]-pic_ul_point[0], point[1]-pic_ul_point[1]]
                                # 将在图片外的点去除
                                if (0 < new_point[0] < (pic_lr_point[0]-pic_ul_point[0])) and (0 < new_point[1] < (pic_lr_point[1] - pic_ul_point[1])):
                                    new_Points.append(new_point)
                            shapes_list_i3["points"] = new_Points
                            new_shapes_list2.append(shapes_list_i3)

                        cur_json_dict = {
                            "version": y_version,
                            "flags": {},
                        }
                        newImgPath = path_output + '/' + j[:-4] + "_" + str(idex) + '.png'
                        cur_json_dict['shapes'] = new_shapes_list2
                        cur_json_dict['imagePath'] = newImgPath
                        cur_json_dict["imageData"] = str(base64.b64encode(open(newImgPath, "rb").read()))
                        cur_json_dict["imageData"] = cur_json_dict["imageData"][2:-1]
                        cur_json_dict["imageHeight"] = h
                        cur_json_dict["imageWidth"] = w
                        # if shapes_list_i
                        with open(newImgPath[:-4] + ".json", 'w') as f:
                            f.write(json.dumps(cur_json_dict))
                        f.close()

                    # 缺陷大于640，按最长边裁剪矩形框，如果矩形框大于3648则短边为3648，将裁剪图片缩放成640*640，保存新图片
                    # 标注同时改变坐标，按比例缩放，保存新txt
                    else:
                        pic_ul_point, pic_lr_point = [int(x_min-20), int(y_min-20)], [int(x_max+20), int(y_max+20)]
                        # 如果裁剪图片超过原图，平移
                        pic_ul_point, pic_lr_point = pingyi(pic_ul_point, pic_lr_point)
                        shapesList2 = deepcopy(shapes_list)
                        for shapes_list_i2 in shapesList2:
                            # 判断单个缺陷是否全部包含在图片内
                            flagInPic = pdInPic(shapes_list_i2, pic_ul_point, pic_lr_point)
                            if flagInPic:
                                new_shapes_list.append(shapes_list_i2)

                        # 缩放，保存裁剪图片
                        crop_img = img0[pic_ul_point[1]:pic_lr_point[1], pic_ul_point[0]:pic_lr_point[0]]
                        crop_img = cv2.resize(crop_img, (w, h), interpolation=cv2.INTER_AREA)
                        cv2.imwrite(path_output + '/' + j[:-4] + "_" + str(idex) + '.png', crop_img)

                        # 改变坐标，保存标注文件
                        sf_ratio_x, sf_ratio_y = (pic_lr_point[0]-pic_ul_point[0]) / w, (pic_lr_point[1]-pic_ul_point[1]) / h
                        new_shapes_list2 = []
                        for shapes_list_i3 in new_shapes_list:
                            points = shapes_list_i3["points"]
                            new_Points = []
                            for point in points:
                                new_point = [(point[0] - pic_ul_point[0]) / sf_ratio_x, (point[1] - pic_ul_point[1]) / sf_ratio_y]
                                # 将在图片外的点去除
                                if (0 < new_point[0] < (pic_lr_point[0] - pic_ul_point[0])) and (0 < new_point[1] < (pic_lr_point[1] - pic_ul_point[1])):
                                    new_Points.append(new_point)
                            shapes_list_i3["points"] = new_Points
                            new_shapes_list2.append(shapes_list_i3)

                        cur_json_dict = {
                            "version": y_version,
                            "flags": {},
                        }
                        newImgPath = path_output + '/' + j[:-4] + "_" + str(idex) + '.png'
                        cur_json_dict['shapes'] = new_shapes_list2
                        cur_json_dict['imagePath'] = newImgPath
                        cur_json_dict["imageData"] = str(base64.b64encode(open(newImgPath, "rb").read()))
                        cur_json_dict["imageData"] = cur_json_dict["imageData"][2:-1]
                        cur_json_dict["imageHeight"] = h
                        cur_json_dict["imageWidth"] = w
                        # if shapes_list_i
                        with open(newImgPath[:-4] + ".json", 'w') as f:
                            f.write(json.dumps(cur_json_dict))
                        f.close()


if __name__ == '__main__':
    path_input = "./test_crop/input"
    path_output = "./test_crop/output"
    crop640(path_input, path_output)

运行裁切完后