python 离线数据增强（扩充数据集）

最新推荐文章于 2024-08-12 21:23:23 发布

oyjwin

最新推荐文章于 2024-08-12 21:23:23 发布

阅读量4.6k

点赞数 8

文章标签： python 深度学习

本文链接：https://blog.csdn.net/oyjwin/article/details/118225295

版权

增强方式包括:
#     (一) 针对像素的数据增强
#     1. 改变亮度
#     2. 加噪声
#     (二) 针对图像的数据增强
#     3. 裁剪(需改变bbox)
#     4. 平移(需改变bbox)
#     5. 镜像(需要改变bbox)
#     6. 旋转(需要改变bbox)
#     7. 遮挡

难点：博主在进行亮度、噪声、裁剪、平移、镜像、遮挡的实现时，还是比较轻松的；但是在旋转的实现中，label的调整陷入了麻烦，但是最后通过改进函数的方式；

最初：

# X_MIN = min(X1, X2, X3, X4)
# X_MAX = max(X1, X2, X3, X4)
# Y_MIN = min(Y1, Y2, Y3, Y4)
# Y_MAX = max(Y1, Y2, Y3, Y4)

改进：

            NEW_X1 = (X1 + X3) / 2
            NEW_X2 = (X2 + X3) / 2
            NEW_X3 = (X2 + X4) / 2
            NEW_X4 = (X1 + X4) / 2

            NEW_Y1 = (Y1 + Y3) / 2
            NEW_Y2 = (Y2 + Y3) / 2
            NEW_Y3 = (Y2 + Y4) / 2
            NEW_Y4 = (Y1 + Y4) / 2
            X_MIN = min(NEW_X1, NEW_X2, NEW_X3, NEW_X4)
            X_MAX = max(NEW_X1, NEW_X2, NEW_X3, NEW_X4)
            Y_MIN = min(NEW_Y1, NEW_Y2, NEW_Y3, NEW_Y4)
            Y_MAX = max(NEW_Y1, NEW_Y2, NEW_Y3, NEW_Y4)

结果：旋转后的label可以很好的框住检测目标；

下面是完整代码：

# -*- coding=utf-8 -*-
# -----------------------------------------------------------------
# Description:
#     data augmentation for obeject detection
# 增强方式包括:
#     (一) 针对像素的数据增强
#     1. 改变亮度
#     2. 加噪声
#     (二) 针对图像的数据增强
#     3. 裁剪(需改变bbox)
#     4. 平移(需改变bbox)
#     5. 镜像(需要改变bbox)
#     6. 旋转(需要改变bbox)
#     7. 遮挡
# 注意:
#     random.seed(),相同的seed,产生的随机数是一样的!!

import time
import random
import cv2
import os
import copy
import math
import numpy as np
from PIL import Image
from skimage.util import random_noise
from skimage import exposure
import xml.etree.ElementTree as ET
from xml.etree.ElementTree import ElementTree, Element
import time
import torch


def show_pic(img, bboxes=None):
    '''
    输入:
        img:    图像array
        bboxes: 图像的所有boudning box list, 格式为[[x_min, y_min, x_max, y_max]....]
        names:  每个box对应的名称
    '''
    cv2.imwrite('./1.jpg', img)
    img = cv2.imread('./1.jpg')
    for i in range(len(bboxes)):
        bbox = bboxes[i]
        x_min = bbox[0]
        y_min = bbox[1]
        x_max = bbox[2]
        y_max = bbox[3]
        cv2.rectangle(img, (int(x_min), int(y_min)), (int(x_max), int(y_max)), (0, 255, 0), thickness=3)
    cv2.namedWindow('pic', 0)  # 1表示原图
    cv2.moveWindow('pic', 0, 0)  # 将显示窗口移到显示屏的相应位置
    cv2.resizeWindow('pic', 1200, 800)  # 可视化的图片大小
    cv2.imshow('pic', img)
    cv2.waitKey(0)
    cv2.destroyAllWindows()



# 图像均为cv2读取
class DataAugmentForObjectDetection():
    def __init__(self, rotation_rate=0.5, max_rotation_angle=5,
                 crop_rate=0.5, shift_rate=0.5, change_light_rate=0.5,
                 add_noise_rate=0.5, flip_rate=0.5,
                 erase_rate=0.5, erase_length=10, erase_holes=1, erase_threshold=0.5):
        self.rotation_rate = rotation_rate
        self.max_rotation_angle = max_rotation_angle
        self.crop_rate = crop_rate
        self.shift_rate = shift_rate
        self.change_light_rate = change_light_rate
        self.add_noise_rate = add_noise_rate
        self.flip_rate = flip_rate
        self.erase_rate = erase_rate
        self.erase_length = erase_length
        self.erase_holes = erase_holes
        self.erase_threshold = erase_threshold

    # 加噪声
    def sp_noise(self,image, prob):
        '''
        添加椒盐噪声
        prob:噪声比例
        '''
        output = np.zeros(image.shape, np.uint8)
        thres = 1 - prob
        for i in range(image.shape[0]):
            for j in range(image.shape[1]):
                rdn = random.random()
                if rdn < prob:
                    output[i][j] = 0
                elif rdn > thres:
                    output[i][j] = 255
                else:
                    output[i][j] = image[i][j]
        return output

    def gasuss_noise(self,image, mean=0, var=0.00001):
        '''
            添加高斯噪声
            mean : 均值
            var : 方差
        '''
        image = np.array(image / 255, dtype=float)
        noise = np.random.normal(mean, var ** 0.5, image.shape)
        out = image + noise
        if out.min() < 0:
            low_clip = -1.
        else:
            low_clip = 0.
        out = np.clip(out, low_clip, 1.0)
        out = np.uint8(out * 255)
        # cv.imshow("gasuss", out)
        return out

    def addNoise(self, img):
        '''
        输入:
            img:图像array
        输出:
            加噪声后的图像array,由于输出的像素是在[0,1]之间,所以得乘以255
        '''
        # random.seed(int(time.time()))
        # return random_noise(img, mode='gaussian', seed=int(time.time()), clip=True)*255
        img = np.int(random_noise(img, mode='gaussian', clip=True, mean=0, var=0.00001)*255)
        return img

    # 调整亮度
    def changeLight(self, img):
        # random.seed(int(time.time()))
        flag = random.uniform(0.5, 1.5)  # flag>1为调暗,小于1为调亮
        img = np.abs(img)
        # if np.min(img) < 0:
        #     print(np.min(img))
        return exposure.adjust_gamma(img, flag)

    # 裁剪
    def crop_img_bboxes(self, img, bboxes):
        '''
        裁剪后的图片要包含所有的框
        输入:
            img:图像array
            bboxes:该图像包含的所有BBox,一个list,每个元素为[x_min, y_min, x_max, y_max],要确保是数值
        输出:
            crop_img:裁剪后的图像array
            crop_bboxes:裁剪后的BBox的坐标list
        '''
        # ---------------------- 裁剪图像 ------------------------------
        width = img.shape[1]
        height = img.shape[0]
        x_min = width
        x_max = 0
        y_min = height
        y_max = 0
        # 裁剪后的包含所有目标框的最小的框
        for bbox in bboxes:
            x_min = min(x_min, bbox[0])
            y_min = min(y_min, bbox[1])
            x_max = max(x_max, bbox[2])
            y_max = max(y_max, bbox[3])

        d_to_left = x_min  # 包含所有目标框的最小框到左边的距离
        d_to_right = width - x_max  # 包含所有目标框的最小框到右边的距离
        d_to_top = y_min  # 包含所有目标框的最小框到顶端的距离
        d_to_bottom = height - y_max  # 包含所有目标框的最小框到底部的距离

        # 随机扩展这个最小框
        crop_x_min = int(x_min - random.uniform(0, d_to_left))
        crop_y_min = int(y_min - random.uniform(0, d_to_top))
        crop_x_max = int(x_max + random.uniform(0, d_to_right))
        crop_y_max = int(y_max + random.uniform(0, d_to_bottom))

        # 随机扩展这个最小框 , 防止别裁的太小
        # crop_x_min = int(x_min - random.uniform(d_to_left//2, d_to_left))
        # crop_y_min = int(y_min - random.uniform(d_to_top//2, d_to_top))
        # crop_x_max = int(x_max + random.uniform(d_to_right//2, d_to_right))
        # crop_y_max = int(y_max + random.uniform(d_to_bottom//2, d_to_bottom))

        # 确保不要越界
        crop_x_min = max(0, crop_x_min)
        crop_y_min = max(0, crop_y_min)
        crop_x_max = min(width, crop_x_max)
        crop_y_max = min(height, crop_y_max)

        crop_img = img[crop_y_min:crop_y_max, crop_x_min:crop_x_max]

        # ---------------------- 裁剪BBox-----------------------
        # 裁剪后的BBox坐标计算
        crop_bboxes = list()
        for bbox in bboxes:
            crop_bboxes.append([bbox[0] - crop_x_min, bbox[1] - crop_y_min, bbox[2] - crop_x_min, bbox[3] - crop_y_min,bbox[4],bbox[4]])

        return crop_img, crop_bboxes

    # 平移
    def shift_pic_bboxes(self, img, bboxes):
        '''
        平移后的图片要包含所有的框
        输入:
            img:图像array
            bboxes:该图像包含的所有BBox,一个list,每个元素为[x_min, y_min, x_max, y_max],要确保是数值
        输出:
            shift_img:平移后的图像array
            shift_bboxes:平移后的BBox的坐标list
        '''
        # ---------------------- 平移图像 ---------------------------
        width = img.shape[1]
        height = img.shape[0]
        x_min = width
        x_max = 0
        y_min = height
        y_max = 0
        for bbox in bboxes:
            x_min = min(x_min, bbox[0])  # bbox的x最小值小于width，x最大值大于0
            y_min = min(y_min, bbox[1])
            x_max = max(x_max, bbox[2])
            y_max = max(y_max, bbox[3])
        # x_min是所有目标框的x的最小值，y_min是所有目标框的y的最小值
        # x_max是所有目标框的x的最大值，y_max是所有目标框的y的最大值
        d_to_left = x_min  # 包含所有目标框的最大左移动距离
        d_to_right = width - x_max  # 包含所有目标框的最大右移动距离
        d_to_top = y_min  # 包含所有目标框的最大上移动距离
        d_to_bottom = height - y_max  # 包含所有目标框的最大下移动距离

        x = random.uniform(-(d_to_left - 1) / 3, (d_to_right - 1) / 3)
        y = random.uniform(-(d_to_top - 1) / 3, (d_to_bottom - 1) / 3)
        # x为向左或右移动的像素值,正为向右,负为向左;
        # y为向上或者向下移动的像素值,正为向下,负为向上
        M = np.float32([[1, 0, x], [0, 1, y]])
        shift_img = cv2.warpAffine(img, M, (img.shape[1], img.shape[0]))

        # ---------------------- 平移BBox ----------------------
        shift_bboxes = list()
        for bbox in bboxes:
            shift_bboxes.append([bbox[0] + x, bbox[1] + y, bbox[2] + x, bbox[3] + y,bbox[4]])

        return shift_img, shift_bboxes

    # 镜像
    def filp_pic_bboxes(self, img, bboxes):
        '''
        输入:
            img:图像array
            bboxes:该图像包含的所有BBox,一个list,每个元素为[x_min, y_min, x_max, y_max],要确保是数值
        输出:
            flip_img:翻转后的图像array
            flip_bboxes:翻转后的BBox的坐标list
        '''
        # ---------------------- 翻转图像 ----------------------
        flip_img = copy.deepcopy(img)
        if random.random() < 0.5:  # 0.5的概率水平翻转，0.5的概率垂直翻转
            horizon = True
        else:
            horizon = False
        height, width, _ = img.shape
        if horizon:  # 水平翻转
            flip_img = cv2.flip(flip_img, 1)  # 1是水平，-1是水平垂直
        else:
            flip_img = cv2.flip(flip_img, 0)
        # ---------------------- 调整BBox ----------------------
        flip_bboxes = list()
        for box in bboxes:
            x_min = box[0]
            y_min = box[1]
            x_max = box[2]
            y_max = box[3]
            if horizon:
                flip_bboxes.append([width - x_max, y_min, width - x_min, y_max,box[4]])
            else:
                flip_bboxes.append([x_min, height - y_max, x_max, height - y_min,box[4]])

        return flip_img, flip_bboxes

    # 旋转
    def rotate_img_bbox(self, img, bboxes, piangle=5, scale=1.):
        '''
        旋转后的图片需要包含所有的框，否则会对图像的原始标注造成破坏。
        需要注意的是，旋转时图像的一些边角可能会被切除掉，需要避免这种情况。
        关于仿射变换：
        输入:
            img:图像array,(h,w,c)
            bboxes:该图像包含的所有BBox,一个list,每个元素为[x_min, y_min, x_max, y_max],要确保是数值
            angle:旋转角度
            scale:默认1
        输出:
            rot_img:旋转后的图像array
            rot_bboxes:旋转后的BBox坐标list

        '''
        # ---------------------- 旋转图像 -----------------------------------
        angle = -piangle * math.pi / 180.0
        rows, cols = img.shape[:2]
        a, b = cols / 2, rows / 2
        M = cv2.getRotationMatrix2D((a, b), piangle, 1)
        # img = cv2.cvtColor(img,cv2.COLOR_RGB2BGR)
        rotated_img = cv2.warpAffine(img, M, (cols, rows))  # 旋转后的图像保持大小不变

        # ---------------------- 矫正bbox坐标 ------------------------------
        # rot_mat是最终的旋转矩阵
        # 获取原始bbox的四个中点，然后将这四个点转换到旋转后的坐标系下
        rot_bboxes = list()
        for bbox in bboxes:
            x1 = float(bbox[0])-1
            y1 = float(bbox[1])-1
            x2 = float(bbox[2])-1
            y2 = float(bbox[3])-1

            x3 = x1
            y3 = y2
            x4 = x2
            y4 = y1

            X1 = (x1 - a) * math.cos(angle) - (y1 - b) * math.sin(angle) + a
            Y1 = (x1 - a) * math.sin(angle) + (y1 - b) * math.cos(angle) + b

            X2 = (x2 - a) * math.cos(angle) - (y2 - b) * math.sin(angle) + a
            Y2 = (x2 - a) * math.sin(angle) + (y2 - b) * math.cos(angle) + b

            X3 = (x3 - a) * math.cos(angle) - (y3 - b) * math.sin(angle) + a
            Y3 = (x3 - a) * math.sin(angle) + (y3 - b) * math.cos(angle) + b

            X4 = (x4 - a) * math.cos(angle) - (y4 - b) * math.sin(angle) + a
            Y4 = (x4 - a) * math.sin(angle) + (y4 - b) * math.cos(angle) + b

            NEW_X1 = (X1 + X3) / 2
            NEW_X2 = (X2 + X3) / 2
            NEW_X3 = (X2 + X4) / 2
            NEW_X4 = (X1 + X4) / 2

            NEW_Y1 = (Y1 + Y3) / 2
            NEW_Y2 = (Y2 + Y3) / 2
            NEW_Y3 = (Y2 + Y4) / 2
            NEW_Y4 = (Y1 + Y4) / 2

            # X_MIN = min(X1, X2, X3, X4)
            # X_MAX = max(X1, X2, X3, X4)
            # Y_MIN = min(Y1, Y2, Y3, Y4)
            # Y_MAX = max(Y1, Y2, Y3, Y4)

            X_MIN = min(NEW_X1, NEW_X2, NEW_X3, NEW_X4)
            X_MAX = max(NEW_X1, NEW_X2, NEW_X3, NEW_X4)
            Y_MIN = min(NEW_Y1, NEW_Y2, NEW_Y3, NEW_Y4)
            Y_MAX = max(NEW_Y1, NEW_Y2, NEW_Y3, NEW_Y4)

            # if X1>X2:
            #     X1,X2 = X2,X1
            #     Y1,Y2 = Y2,Y1
            #
            # # 求中心点
            # X_c = float(X1 + X2) / 2
            # Y_c = float(Y1 + Y2) / 2
            # H = Y2 - Y1
            # W = X2 - X1
            #
            # X_MIN = X_c - W/2
            # Y_MIN = Y_c - H/2
            # X_MAX = X_c + W/2
            # Y_MAX = Y_c + H/2


            # 加入list中
            rot_bboxes.append([X_MIN, Y_MIN, X_MAX, Y_MAX,bbox[4]])

        return rotated_img, rot_bboxes

    # 遮挡，擦除
    def erase(self, img, bboxes, length=100, n_holes=1, threshold=0.5):
        '''
        Randomly mask out one or more patches from an image.
        从图像中随机遮罩一个或多个面片。
        Args:
            img: a 3D numpy array,(h,w,c)
            bboxes: 框的坐标
            n_holes(int): Number of patches to cut out of each image.
            length(int): The length (in pixels) of each square patch.
        '''

        def cal_iou(boxA, boxB):
            '''
            输入：boxA, boxB
            输出：返回iou
            '''
            # 确定相交矩形的(x, y)-坐标
            xA = max(boxA[0], boxB[0])
            yA = max(boxA[1], boxB[1])
            xB = min(boxA[2], boxB[2])
            yB = min(boxA[3], boxB[3])
            if xB <= xA or yB <= yA:
                return 0.0
            # 计算相交矩形的面积
            interArea = (xB - xA + 1) * (yB - yA + 1)
            # 计算prediction和ground-truth的面积
            boxAArea = (boxA[2] - boxA[0] + 1) * (boxA[3] - boxA[1] + 1)
            boxBArea = (boxB[2] - boxB[0] + 1) * (boxB[3] - boxB[1] + 1)
            # iou = interArea / float(boxAArea + boxBArea - interArea)
            iou = interArea / float(boxBArea)

            return iou

        # 得到h和w
        if img.ndim == 3:
            h, w, c = img.shape
        else:
            _, h, w, c = img.shape

        mask = np.ones((h, w, c), np.float32)

        for n in range(n_holes):
            overlap = True  # 看切割的区域是否与box重叠太多
            while overlap:
                y = np.random.randint(h)
                x = np.random.randint(w)
                # numpy.clip(a, a_min, a_max, out=None), clip这个函数将将数组中的元素限制在a_min, a_max之间，
                # 大于a_max的就使得它等于a_max，小于a_min的就使得它等于a_min
                x1 = np.clip(x - length // 2, 0, w)
                x2 = np.clip(x + length // 2, 0, w)
                y1 = np.clip(y - length // 2, 0, h)
                y2 = np.clip(y + length // 2, 0, h)
                overlap = False
                for box in bboxes:
                    if cal_iou([x1, y1, x2, y2], box) > threshold:
                        overlap = True
                        break
            mask[y1:y2, x1:x2, :] = 0.
        # mask = np.expand_dims(mask, axis=0)
        erase_img = img * mask

        return erase_img, bboxes

    def dataAugment(self, img, bboxes):
        '''
        图像增强
        输入:
            img:图像array
            bboxes:该图像的所有框坐标
        输出:
            img:增强后的图像
            bboxes:增强后图片对应的box
        '''
        change_num = random.sample(range(-6,0), 1)[0]  # 改变的次数
        print('------------------开始进行数据增强-------------------')
        while change_num < 1:  # 默认至少有一种数据增强生效
            if random.random() < self.add_noise_rate:  # 加噪声
                print('加噪声')
                change_num += 1
                img = self.gasuss_noise(img)
                print(img.shape)

            if random.random() > self.change_light_rate:  # 改变亮度
                print('亮度')
                change_num += 1
                img = self.changeLight(img)
                print(img.shape)

            if random.random() < self.shift_rate:  # 平移
                print('平移')
                change_num += 1
                img, bboxes = self.shift_pic_bboxes(img, bboxes)
                print(img.shape)

            if random.random() < self.flip_rate:  # 镜像
                print('镜像')
                change_num += 1
                img, bboxes = self.filp_pic_bboxes(img, bboxes)
                print(img.shape)

            if random.random() > self.rotation_rate:  # 旋转
                print('旋转')
                change_num += 1
                # angle = random.uniform(-self.max_rotation_angle, self.max_rotation_angle)
                angle = random.sample(range(-180,180), 1)[0]
                # angle = -45
                print(angle)
                scale = random.uniform(0.7, 0.8)
                img, bboxes = self.rotate_img_bbox(img, bboxes, angle, scale)

            if random.random() < self.erase_rate:  # 遮挡，擦除
                print('遮挡,擦除')
                change_num += 1
                erase_length = random.sample(range(0,50),1)[0]
                erase_holes = random.sample(range(0,5),1)[0]
                img, bboxes = self.erase(img, bboxes, length=erase_length, n_holes=erase_holes,
                                 threshold=self.erase_threshold)
                print(img.shape)

            print('\n')
        print('------------------结束进行数据增强-------------------')
        print(bboxes)
        return img, bboxes


def xyxy2xywh(x):
    dw = 1. / (608)
    dh = 1. / (608)
    # if w >= 1:
    #     w = 0.99
    # if h >= 1:
    #     h = 0.99
    # Convert nx4 boxes from [x1, y1, x2, y2] to [x, y, w, h] where xy1=top-left, xy2=bottom-right
    y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x)
    print(x.shape)
    y[:, 0] = ((x[:, 0] + x[:, 2]) / 2 -1)*dw # x center
    y[:, 1] = ((x[:, 1] + x[:, 3]) / 2  -1)*dw# y center
    y[:, 2] = (x[:, 2] - x[:, 0])*dh  # width
    y[:, 3] = (x[:, 3] - x[:, 1])*dh  # height
    return y

if __name__ == '__main__':
    import shutil
    from DOC import *

    need_aug_num = 10
    dataAug = DataAugmentForObjectDetection()
    source_pic_root_path = r'C:\Users\JKY\Desktop\***' #your image folders
    source_xml_root_path = r'C:\Users\JKY\Desktop\***' # your xml path
    save_images = r'C:\Users\JKY\Desktop\DataAugmentation_ForObjectDetect-master\dataAugmentation-images' # image save path
    save_labels = r'C:\Users\JKY\Desktop\DataAugmentation_ForObjectDetect-master\dataAugmentation-label' # label save path
    for parent, _, files in os.walk(source_pic_root_path):
        for file in files:
            cnt = 0
            while cnt < need_aug_num:
                try:
                    pic_path = os.path.join(parent, file)
                    xml_path = os.path.join(source_xml_root_path, file[:-4] + '.xml')
                    if not os.path.exists(xml_path):
                        cnt += 1
                        continue
                    coords = parse_xml(xml_path)  # 解析得到box信息，格式为[[x_min,y_min,x_max,y_max,name]]
                    # coords = [coord[:4] for coord in coords]
                    print(pic_path)
                    # img = cv2.imread(pic_path)
                    img = Image.open(pic_path)
                    img = np.array(img)
                    # show_pic(img, coords)  # 原图

                    auged_img, auged_bboxes = dataAug.dataAugment(img, coords)
                    img = cv2.cvtColor(auged_img, cv2.COLOR_RGB2BGR)
                    cnt += 1
                    name = str(int(time.time() * 1e5))
                    cv2.imwrite(os.path.join(save_images,name+'.jpg'),img)
                    print('image save success')
                    txt_path = os.path.join(save_labels,name+'.txt')

                    auged_bboxes = np.float32(np.array(auged_bboxes))
                    xywh = xyxy2xywh(auged_bboxes).astype(np.str).tolist()
                    res_bboxes = []
                    for i in range(len(xywh)):
                        index = xywh[i][-1].index('.')
                        xywhs = [xywh[i][-1][:index]] + xywh[i][:4]
                        print(xywhs)
                        res_bboxes.append(xywhs)

                    res = []
                    print(res_bboxes)
                    for listi in res_bboxes:
                        stri = ' '.join(listi)
                        stri += '\n'
                        res.append(stri)
                    print(res_bboxes)
                    with open(txt_path,'w+',-1) as file:
                        file.writelines(res)
                    file.close()
                    # show_pic(auged_img, auged_bboxes)  # 数据增强后的图
                except:
                    cnt += 1
                    continue