目标检测小目标数据增强大杀器-贴图方案（支持yolo格式）

最新推荐文章于 2024-06-12 09:54:13 发布

落难Coder

最新推荐文章于 2024-06-12 09:54:13 发布

阅读量1k

点赞数 2

分类专栏：竞赛之神文章标签：目标检测计算机视觉

本文链接：https://blog.csdn.net/u014297502/article/details/127667933

版权

竞赛之神专栏收录该内容

30 篇文章 7 订阅

订阅专栏

文件结构

- Images：包含yolo格式标注文件和被标注的图像文件
- bbox：空文件夹

切割小目标图像

crop_roi.py

import os
import shutil
from os.path import join
import cv2
import glob

root_dir = "./Images"
save_dir = "./bbox"

jpg_list = glob.glob(root_dir + "/*.png")


fo = open("dpj_small.txt", "w")

max_s = -1
min_s = 1000

for jpg_path in jpg_list:
    txt_path = jpg_path.replace("png", "txt")
    jpg_name = os.path.basename(jpg_path)

    f = open(txt_path, "r")

    img = cv2.imread(jpg_path)

    height, width, channel = img.shape

    file_contents = f.readlines()


    for num, file_content in enumerate(file_contents):
        print(num)
        clss, xc, yc, w, h = file_content.split()
        xc, yc, w, h = float(xc), float(yc), float(w), float(h)

        xc *= width
        yc *= height
        w *= width
        h *= height

        max_s = max(w*h, max_s)
        min_s = min(w*h, min_s)

        half_w, half_h = w // 2, h // 2

        x1, y1 = int(xc - half_w), int(yc - half_h)
        x2, y2 = int(xc + half_w), int(yc + half_h)

        crop_img = img[y1:y2, x1:x2]

        new_jpg_name = jpg_name.split('.')[0] + "_crop_" + str(num) + ".png"
        cv2.imwrite(os.path.join(save_dir, new_jpg_name), crop_img)
        fo.write(os.path.join(save_dir, new_jpg_name)+"\n")

    f.close()

fo.close()

print(max_s, min_s)

贴图

paste.py

import os
import random
from os.path import join
import aug
import Helpers as hp

from util import *

# ###########Pipeline##############
"""
1 准备数据集和yolo格式标签, 如果自己的数据集是voc或coco格式的，先转换成yolo格式，增强后在转回来
2 run crop_image.py  裁剪出目标并保存图片
3 run paste.py   随机将裁剪出目标图片贴到需要增强的数据集上，并且保存增强后的图片集和label文件
"""

base_dir = os.getcwd()

save_base_dir = join(base_dir, 'save_path')

check_dir(save_base_dir)

# imgs_dir = [f.strip() for f in open(join(base_dir, 'sea.txt')).readlines()]
imgs_dir = [os.path.join('Images', f) for f in os.listdir('Images') if f.endswith('png')]
labels_dir = hp.replace_labels(imgs_dir)
# print(imgs_dir, '\n', labels_dir)

# small_imgs_dir = [f.strip() for f in open(join(base_dir, 'dpj_small.txt')).readlines()]
small_imgs_dir = [os.path.join('bbox', f) for f in os.listdir('bbox') if f.endswith('png')]
random.shuffle(small_imgs_dir)  # 目标图片打乱
# print(small_imgs_dir)

times = 2  # 随机选择增加多少个目标

for image_dir, label_dir in zip(imgs_dir, labels_dir):
    # print(image_dir, label_dir)
    small_img = []
    for x in range(times):
        if small_imgs_dir == []:
            small_imgs_dir = [os.path.join('bbox', f) for f in os.listdir('bbox') if f.endswith('png')]
            random.shuffle(small_imgs_dir)
        small_img.append(small_imgs_dir.pop())
    # print("ok")
    aug.copysmallobjects(image_dir, label_dir, save_base_dir, small_img, times)

修改32行代码可以修改每次嵌入的小目标类别

util.py：

import os
import cv2
import numpy as np
from os.path import join, split
import random


def convert(size, box):
    dw = 1. / (size[0])
    dh = 1. / (size[1])
    x = (box[0] + box[1]) / 2.0 - 1
    y = (box[2] + box[3]) / 2.0 - 1
    w = box[1] - box[0]
    h = box[3] - box[2]
    x = x * dw
    w = w * dw
    y = y * dh
    h = h * dh
    return (x, y, w, h)


def issmallobject(bbox, thresh):
    if bbox[0] * bbox[1] <= thresh:
        return True
    else:
        return False


def read_label_txt(label_dir):
    labels = []
    with open(label_dir) as fp:
        for f in fp.readlines():
            labels.append(f.strip().split(' '))
    return labels


def load_txt_label(label_dir):
    return np.loadtxt(label_dir, dtype=str)


def load_txt_labels(label_dir):
    labels = []
    for l in label_dir:
        la = load_txt_label(l)
        labels.append(la)
    return labels


def check_dir(dir):
    if not os.path.exists(dir):
        os.makedirs(dir)


def rescale_yolo_labels(labels, img_shape):
    height, width, nchannel = img_shape
    rescale_boxes = []
    for box in list(labels):
        x_c = float(box[1]) * width
        y_c = float(box[2]) * height
        w = float(box[3]) * width
        h = float(box[4]) * height
        x_left = x_c - w * .5
        y_left = y_c - h * .5
        x_right = x_c + w * .5
        y_right = y_c + h * .5
        rescale_boxes.append([box[0], int(x_left), int(y_left), int(x_right), int(y_right)])
    return rescale_boxes


def draw_annotation_to_image(img, annotation, save_img_dir):
    for anno in annotation:
        cl, x1, y1, x2, y2 = anno
        cv2.rectangle(img, pt1=(x1, y1), pt2=(x2, y2), color=(255, 0, 0))
        font = cv2.FONT_HERSHEY_SIMPLEX
        cv2.putText(img, cl, (int((x1 + x2) / 2), y1 - 5), font, fontScale=0.8, color=(0, 0, 255))
    cv2.imwrite(save_img_dir, img)


def bbox_iou(box1, box2):
    cl, b1_x1, b1_y1, b1_x2, b1_y2 = box1
    cl, b2_x1, b2_y1, b2_x2, b2_y2 = box2
    # get the corrdinates of the intersection rectangle
    inter_rect_x1 = max(b1_x1, b2_x1)
    inter_rect_y1 = max(b1_y1, b2_y1)
    inter_rect_x2 = min(b1_x2, b2_x2)
    inter_rect_y2 = min(b1_y2, b2_y2)
    # Intersection area
    inter_width = inter_rect_x2 - inter_rect_x1 + 1
    inter_height = inter_rect_y2 - inter_rect_y1 + 1
    if inter_width > 0 and inter_height > 0:  # strong condition
        inter_area = inter_width * inter_height
        # Union Area
        b1_area = (b1_x2 - b1_x1 + 1) * (b1_y2 - b1_y1 + 1)
        b2_area = (b2_x2 - b2_x1 + 1) * (b2_y2 - b2_y1 + 1)
        iou = inter_area / (b1_area + b2_area - inter_area)
    else:
        iou = 0
    return iou


def swap(x1, x2):
    if (x1 > x2):
        temp = x1
        x1 = x2
        x2 = temp
    return x1, x2


def norm_sampling(search_space):
    # 随机生成点
    search_x_left, search_y_left, search_x_right, search_y_right = search_space

    search_x_left = int(search_x_left)
    search_x_right = int(search_x_right)
    search_y_left = int(search_y_left)
    search_y_right = int(search_y_right)

    new_bbox_x_center = random.randint(search_x_left, search_x_right)
    # print(search_y_left, search_y_right, '=')
    new_bbox_y_center = random.randint(search_y_left, search_y_right)
    return [new_bbox_x_center, new_bbox_y_center]


def flip_bbox(roi):
    roi = roi[:, ::-1, :]
    return roi


def sampling_new_bbox_center_point(img_shape, bbox):
    #### sampling space ####
    height, width, nc = img_shape
    bbox_h, bbox_w, bbox_c = bbox
    ### left top ###
    '''
    search_x_left, search_y_left, search_x_right, search_y_right = width * 0.55 , height * 0.5 , \
                                                                   width * 0.9 , height * 0.95
    '''
    # 修改区域
    search_x_left, search_y_left, search_x_right, search_y_right = width * 0.1 , height * 0.1 , \
                                                                   width * 0.9, height * 0.9

    return [search_x_left, search_y_left, search_x_right, search_y_right]


def random_add_patches(bbox_img, rescale_boxes, shape, paste_number, iou_thresh):
    temp = []
    for rescale_bbox in rescale_boxes:
        temp.append(rescale_bbox)
    bbox_h, bbox_w, bbox_c = bbox_img
    img_h, img_w, img_c = shape
    center_search_space = sampling_new_bbox_center_point(shape, bbox_img)  # 选取生成随机点区域
    success_num = 0
    new_bboxes = []
    # 需要贴图的标签，每次只能贴一个标签目标
    cl = 5

    # print(center_search_space,'+')

    while success_num < paste_number:
        # print(success_num)
        new_bbox_x_center, new_bbox_y_center = norm_sampling(center_search_space)   # 随机生成点坐标
        if new_bbox_x_center-0.5*bbox_w < 0 or new_bbox_x_center+0.5*bbox_w > img_w:
            continue
        if new_bbox_y_center-0.5*bbox_h < 0 or new_bbox_y_center+0.5*bbox_h > img_h:
            continue
        new_bbox_x_left, new_bbox_y_left, new_bbox_x_right, new_bbox_y_right = new_bbox_x_center - 0.5 * bbox_w, \
                                                                               new_bbox_y_center - 0.5 * bbox_h, \
                                                                               new_bbox_x_center + 0.5 * bbox_w, \
                                                                               new_bbox_y_center + 0.5 * bbox_h
        new_bbox = [cl, int(new_bbox_x_left), int(new_bbox_y_left), int(new_bbox_x_right), int(new_bbox_y_right)]

        ious = [bbox_iou(new_bbox, bbox_t) for bbox_t in rescale_boxes]
        ious2 = [bbox_iou(new_bbox,bbox_t1) for bbox_t1 in new_bboxes]

        if ious2 == []:
            ious2.append(0)
            
        if max(ious) <= iou_thresh and max(ious2) <= iou_thresh:
            success_num += 1
            temp.append(new_bbox)
            new_bboxes.append(new_bbox)
        else:
            continue

    return new_bboxes

aug.py：

# import glob
import cv2 as cv2
import numpy as np
# from PIL import Image
import random
import math
from os.path import basename, split, join, dirname
from util import *


def find_str(filename):
    if 'train' in filename:
        return dirname(filename[filename.find('train'):])
    else:
        return dirname(filename[filename.find('val'):])


def convert_all_boxes(shape, anno_infos, yolo_label_txt_dir):
    height, width, n = shape
    label_file = open(yolo_label_txt_dir, 'w')
    for anno_info in anno_infos:
        target_id, x1, y1, x2, y2 = anno_info
        b = (float(x1), float(x2), float(y1), float(y2))
        bb = convert((width, height), b)
        label_file.write(
            str(target_id) + " " + " ".join([str(a) for a in bb]) + '\n')


def save_crop_image(save_crop_base_dir, image_dir, idx, roi):
    crop_save_dir = join(save_crop_base_dir, find_str(image_dir))
    check_dir(crop_save_dir)
    crop_img_save_dir = join(
        crop_save_dir,
        basename(image_dir)[:-3] + '_crop_' + str(idx) + '.png')
    cv2.imwrite(crop_img_save_dir, roi)


def GaussianBlurImg(image):
    # 高斯模糊
    ran = random.randint(0, 9)
    if ran % 2 == 1:
        image = cv2.GaussianBlur(image, ksize=(ran, ran), sigmaX=0, sigmaY=0)
    else:
        pass
    return image


def roi_resize(image, area_max=2000, area_min=1000):
    # 改变图片大小
    height, width, channels = image.shape

    # while (height * width) > area_max:
    #     image = cv2.resize(image, (int(width * 0.9), int(height * 0.9)))
    #     height, width, channels = image.shape
    #     height, width = int(height * 0.9), int(width * 0.9)
    #
    # while (height * width) < area_min:
    #     image = cv2.resize(image, (int(width * 1.1), int(height * 1.1)))
    #     height, width, channels = image.shape
    #     height, width = int(height * 1.1), int(width * 1.1)
    image = cv2.resize(image, (60, 60))   # 注意，目标size不能太大，否则图片会不够大小贴下目标

    return image


def copysmallobjects(image_dir, label_dir, save_base_dir, small_img_dir,
                      times):
    image = cv2.imread(image_dir)
    labels = read_label_txt(label_dir)
    if len(labels) == 0:
        return

    # yolo txt转化为x1y1x2y2
    rescale_labels = rescale_yolo_labels(labels, image.shape)  # 转换坐标表示
    print("org bbox:", rescale_labels)  # 原图像bbox集合
    all_boxes = []

    for _, rescale_label in enumerate(rescale_labels):
        all_boxes.append(rescale_label)

    for small_img_dirs in small_img_dir:
        image_bbox = cv2.imread(small_img_dirs)
        # from 3000 to 1500
        roi = roi_resize(image_bbox, area_max=1000, area_min=200)  # 对roi图像做缩放
        print('===', rescale_labels)
        new_bboxes = random_add_patches(roi.shape,     # 此函数roi目标贴到原图像上，返回的bbox为roi在原图上的bbox,
                                         rescale_labels,  # 并且bbox不会挡住图片上原有的目标
                                         image.shape,
                                         paste_number=2,  # 将该roi目标复制几次并贴到到原图上
                                         iou_thresh=0)    # iou_thresh 原图上的bbox和贴上去的roi的bbox的阈值
        print(new_bboxes)
        count = 0
        # print("end patch")
        for new_bbox in new_bboxes:
            count += 1

            cl, bbox_left, bbox_top, bbox_right, bbox_bottom = new_bbox[0], new_bbox[1], new_bbox[2], new_bbox[3], \
                                                               new_bbox[4]
            #roi = GaussianBlurImg(roi)  # 高斯模糊
            height, width, channels = roi.shape
            center = (int(width / 2), int(height / 2))
            #ran_point = (int((bbox_top+bbox_bottom)/2),int((bbox_left+bbox_right)/2))
            mask = 255 * np.ones(roi.shape, roi.dtype)
            # print("before try")
            try:
                if count > 1:  # 如果count>1,说明paste_number大于1次，对roi做一个翻转变换
                    roi = flip_bbox(roi)
                #image[bbox_top:bbox_bottom, bbox_left:bbox_right] = roi
                #image[bbox_top:bbox_bottom, bbox_left:bbox_right] = cv2.addWeighted(image[bbox_top:bbox_bottom, bbox_left:bbox_right],
                #                                                                    0.5,roi,0.5,0) #图片融合

                # 融合 cv2.seamlessClone
                #image = cv2.seamlessClone(roi, image, mask, ran_point, cv2.NORMAL_CLONE)
                #print(str(bbox_bottom-bbox_top) + "|" + str(bbox_right-bbox_left))
                #print(roi.shape)
                #print(mask.shape)
                image[bbox_top:bbox_bottom, bbox_left:
                      bbox_right] = cv2.seamlessClone(
                          roi,
                          image[bbox_top:bbox_bottom, bbox_left:bbox_right],
                          mask, center, cv2.NORMAL_CLONE)
                all_boxes.append(new_bbox)
                rescale_labels.append(new_bbox)

                # print("end try")
            except ValueError:
                print("---")
                continue
    # print("end for")
    dir_name = find_str(image_dir)
    save_dir = join(save_base_dir, dir_name)
    check_dir(save_dir)
    yolo_txt_dir = join(
        save_dir,
        basename(image_dir.replace('.png', '_aug_%s.txt' % str(times))))
    cv2.imwrite(
        join(save_dir,
             basename(image_dir).replace('.png', '_aug_%s.png' % str(times))),
        image)
    convert_all_boxes(image.shape, all_boxes, yolo_txt_dir)

Helpers.py：

import glob
import cv2 as cv2
import numpy as np
import matplotlib.pyplot as plt
# import random
import math
from tqdm import tqdm


def load_images(path):
    image_list = []
    images = glob.glob(path)
    for index in range(len(images)):
        image = cv2.cvtColor(cv2.imread(images[index]), cv2.COLOR_BGR2RGB)
        image_list.append(image)
        # image_list.append(cv2.resize(image,(1280,720)))

    return image_list


def read_images(path):
    images = glob.glob(path)
    return images


def load_images_from_path(path):
    image_list = []
    for p in tqdm(path):
        image = cv2.cvtColor(cv2.imread(p), cv2.COLOR_BGR2RGB)
        image_list.append(image)
    return image_list


def replace_labels(path):
    labelpath = []
    for p in path:
        labelpath.append(p.replace('.png', '.txt'))
    return labelpath

其他tools

rename.bat：

@echo off
REM 声明采用UTF-8编码
chcp 65001
setlocal enabledelayedexpansion
set count=1000
cd ../datasets/JPEGImages
for /f "delims=" %%i in ('dir /b *.jpg,*.png,*.bmp,*.jpeg,*.gif') do call:Rename "%%~i"
pause
exit
 
:Rename
set /a count+=1
if /i "%~1"=="!count:~1!%~x1" goto :eof
if exist "!count:~1!%~x1" goto Rename
echo rename：%1 !count:~1!
ren "%~1" "!count:~1!%~x1"
goto :eof