图像的旋转、镜像、平移操作并更新xml文件

1.图像加噪声

'''
通过将noise_type变量设置为"salt_and_pepper""gaussian"来选择添加盐和胡椒噪声或高斯噪声,
以及通过intensity变量来调整噪声的强度。
该代码会遍历图像文件夹中的所有图像文件,并读取对应的XML标注文件。
然后,根据选择的噪声类型和强度,将图像添加噪声,并生成对应的带噪声图像和XML文件。
'''
import os
import random
import xml.etree.ElementTree as ET
from PIL import Image, ImageDraw

def add_noise_and_generate_xml(image_folder, xml_folder, output_image_folder, output_xml_folder, noise_type, intensity):
    # 遍历图像文件夹中的所有图像文件
    for filename in os.listdir(image_folder):
        if filename.endswith(".jpg") or filename.endswith(".png"):
            image_path = os.path.join(image_folder, filename)
            image = Image.open(image_path)

            # 添加噪声
            noisy_image = add_noise(image, noise_type, intensity)
            noisy_filename = os.path.splitext(filename)[0] + "_noisy.jpg"
            noisy_path = os.path.join(output_image_folder, noisy_filename)
            noisy_image.save(noisy_path)

            # 读取对应的XML标注文件
            annotation_filename = os.path.splitext(filename)[0] + ".xml"
            annotation_path = os.path.join(xml_folder, annotation_filename)
            if not os.path.isfile(annotation_path):
                continue

            # 解析XML文件
            tree = ET.parse(annotation_path)
            root = tree.getroot()

            # 生成新的XML文件
            noisy_xml_filename = os.path.splitext(filename)[0] + "_noisy.xml"
            noisy_xml_path = os.path.join(output_xml_folder, noisy_xml_filename)
            tree.write(noisy_xml_path)

            print(f"Noisy image '{noisy_filename}' and generated XML '{noisy_xml_filename}'.")

def add_noise(image, noise_type, intensity):
    noisy_image = image.copy()
    width, height = noisy_image.size
    pixels = noisy_image.load()

    if noise_type == "salt_and_pepper":
        salt_pixels = random.sample(range(width * height), int(width * height * intensity))
        pepper_pixels = random.sample(range(width * height), int(width * height * intensity))

        for pixel in salt_pixels:
            x = pixel % width
            y = pixel // width
            pixels[x, y] = (255, 255, 255)  # 白色

        for pixel in pepper_pixels:
            x = pixel % width
            y = pixel // width
            pixels[x, y] = (0, 0, 0)  # 黑色

    elif noise_type == "gaussian":
        mean = 0
        std_dev = intensity

        for x in range(width):
            for y in range(height):
                r, g, b = pixels[x, y]
                r += int(random.gauss(mean, std_dev))
                g += int(random.gauss(mean, std_dev))
                b += int(random.gauss(mean, std_dev))
                r = max(0, min(255, r))
                g = max(0, min(255, g))
                b = max(0, min(255, b))
                pixels[x, y] = (r, g, b)

    return noisy_image

image_folder = './data/img'  # 输入图像文件夹路径
xml_folder = './data/xml'  # 输入XML文件夹路径
output_image_folder = './data/img_new'  # 输出图像文件夹路径
output_xml_folder = './data/xml_new'  # 输出XML文件夹路径
noise_type = "salt_and_pepper"  # 噪声类型,可选值为 "salt_and_pepper" 或 "gaussian"
intensity = 0.0003  # 噪声强度
add_noise_and_generate_xml(image_folder, xml_folder, output_image_folder, output_xml_folder, noise_type, intensity)

2.更改对比度

import os
import cv2
import numpy as np
import xml.etree.ElementTree as ET

def adjust_contrast_and_generate_xml(image_folder, xml_folder, output_image_folder, output_xml_folder, contrast):
    # 遍历图像文件夹中的所有图像文件
    for filename in os.listdir(image_folder):
        if filename.endswith(".jpg") or filename.endswith(".png"):
            image_path = os.path.join(image_folder, filename)
            image = cv2.imread(image_path)

            # 调整图像对比度
            adjusted_image = adjust_contrast(image, contrast)
            adjusted_filename = os.path.splitext(filename)[0] + "_adjusted.jpg"
            adjusted_path = os.path.join(output_image_folder, adjusted_filename)
            cv2.imwrite(adjusted_path, adjusted_image)

            # 读取对应的XML标注文件
            annotation_filename = os.path.splitext(filename)[0] + ".xml"
            annotation_path = os.path.join(xml_folder, annotation_filename)
            if not os.path.isfile(annotation_path):
                continue

            # 解析XML文件
            tree = ET.parse(annotation_path)
            root = tree.getroot()

            # 生成新的XML文件
            adjusted_xml_filename = os.path.splitext(filename)[0] + "_adjusted.xml"
            adjusted_xml_path = os.path.join(output_xml_folder, adjusted_xml_filename)
            tree.write(adjusted_xml_path)

            print(f"Adjusted image '{adjusted_filename}' and generated XML '{adjusted_xml_filename}'.")

def adjust_contrast(image, contrast):
    # 转换图像为浮点数类型
    image = image.astype(float)
    # 调整对比度
    adjusted_image = image * contrast
    # 将像素值限制在0-255范围内
    adjusted_image = np.clip(adjusted_image, 0, 255)
    # 转换图像为8位无符号整数类型
    adjusted_image = adjusted_image.astype(np.uint8)

    return adjusted_image

image_folder = './data/img'  # 输入图像文件夹路径
xml_folder = './data/xml'  # 输入XML文件夹路径
output_image_folder = './data/img_new'  # 输出图像文件夹路径
output_xml_folder = './data/xml_new'  # 输出XML文件夹路径
contrast = 1.5  # 对比度调整系数

adjust_contrast_and_generate_xml(image_folder, xml_folder, output_image_folder, output_xml_folder, contrast)

3.翻转

import os
import xml.etree.ElementTree as ET
from PIL import Image

def flip_and_generate_xml(image_folder, xml_folder, output_image_folder, output_xml_folder, flip_direction):
    # 遍历图像文件夹中的所有图像文件
    for filename in os.listdir(image_folder):
        if filename.endswith(".jpg") or filename.endswith(".png"):
            image_path = os.path.join(image_folder, filename)
            image = Image.open(image_path)

            # 翻转图像
            if flip_direction == "vertical":
                flipped_image = image.transpose(Image.FLIP_TOP_BOTTOM)
            elif flip_direction == "horizontal":
                flipped_image = image.transpose(Image.FLIP_LEFT_RIGHT)
            else:
                print("Invalid flip direction. Please choose 'vertical' or 'horizontal'.")
                return

            flipped_filename = os.path.splitext(filename)[0] + "_flipped.jpg"
            flipped_path = os.path.join(output_image_folder, flipped_filename)
            flipped_image.save(flipped_path)

            # 读取对应的XML标注文件
            annotation_filename = os.path.splitext(filename)[0] + ".xml"
            annotation_path = os.path.join(xml_folder, annotation_filename)
            if not os.path.isfile(annotation_path):
                continue

            # 解析XML文件
            tree = ET.parse(annotation_path)
            root = tree.getroot()

            height = image.height

            for obj in root.findall("object"):
                # 更新标注框的坐标信息
                ymin = int(obj.find("bndbox/ymin").text)
                ymax = int(obj.find("bndbox/ymax").text)

                obj.find("bndbox/ymin").text = str(height - ymax)
                obj.find("bndbox/ymax").text = str(height - ymin)

            # 生成新的XML文件
            flipped_xml_filename = os.path.splitext(filename)[0] + "_flipped.xml"
            flipped_xml_path = os.path.join(output_xml_folder, flipped_xml_filename)
            tree.write(flipped_xml_path)

            print(f"Flipped image '{flipped_filename}' and generated XML '{flipped_xml_filename}'.")


image_folder = './data/img'  # 输入图像文件夹路径
xml_folder = './data/xml'  # 输入XML文件夹路径
output_image_folder = './data/img_new'  # 输出图像文件夹路径
output_xml_folder = './data/xml_new'  # 输出XML文件夹路径
flip_direction = "vertical"  # 可选值为 "vertical" 或 "horizontal"
flip_and_generate_xml(image_folder, xml_folder, output_image_folder, output_xml_folder, flip_direction)

4.镜像

import os
import xml.etree.ElementTree as ET
from PIL import Image

def mirror_and_generate_xml(image_folder, xml_folder, output_image_folder, output_xml_folder):
    # 遍历图像文件夹中的所有图像文件
    for filename in os.listdir(image_folder):
        if filename.endswith(".jpg") or filename.endswith(".png"):
            image_path = os.path.join(image_folder, filename)
            image = Image.open(image_path)

            # 镜像图像
            mirrored_image = image.transpose(Image.FLIP_LEFT_RIGHT)
            mirrored_filename = os.path.splitext(filename)[0] + "_mirrored.jpg"
            mirrored_path = os.path.join(output_image_folder, mirrored_filename)
            mirrored_image.save(mirrored_path)

            # 读取对应的XML标注文件
            annotation_filename = os.path.splitext(filename)[0] + ".xml"
            annotation_path = os.path.join(xml_folder, annotation_filename)
            if not os.path.isfile(annotation_path):
                continue

            # 解析XML文件
            tree = ET.parse(annotation_path)
            root = tree.getroot()

            width = image.width

            for obj in root.findall("object"):
                # 更新标注框的坐标信息
                xmin = int(obj.find("bndbox/xmin").text)
                ymin = int(obj.find("bndbox/ymin").text)
                xmax = int(obj.find("bndbox/xmax").text)
                ymax = int(obj.find("bndbox/ymax").text)

                obj.find("bndbox/xmin").text = str(width - xmax)
                obj.find("bndbox/xmax").text = str(width - xmin)

            # 生成新的XML文件
            mirrored_xml_filename = os.path.splitext(filename)[0] + "_mirrored.xml"
            mirrored_xml_path = os.path.join(output_xml_folder, mirrored_xml_filename)
            tree.write(mirrored_xml_path)

            print(f"Mirrored image '{mirrored_filename}' and generated XML '{mirrored_xml_filename}'.")

image_folder = './data/img'  # 输入图像文件夹路径
xml_folder = './data/xml'  # 输入XML文件夹路径
output_image_folder = './data/img_new'  # 输出图像文件夹路径
output_xml_folder = './data/xml_new'  # 输出XML文件夹路径

mirror_and_generate_xml(image_folder, xml_folder, output_image_folder, output_xml_folder)

5.旋转

import os
import xml.etree.ElementTree as ET
from PIL import Image

def rotate_and_generate_xml(image_folder, xml_folder, output_image_folder, output_xml_folder):
    # 遍历图像文件夹中的所有图像文件
    for filename in os.listdir(image_folder):
        if filename.endswith(".jpg") or filename.endswith(".png"):
            image_path = os.path.join(image_folder, filename)
            image = Image.open(image_path)

            # 旋转图像
            rotated_image = image.transpose(Image.ROTATE_90)
            rotated_filename = os.path.splitext(filename)[0] + "_rotated.jpg"
            rotated_path = os.path.join(output_image_folder, rotated_filename)
            rotated_image.save(rotated_path)

            # 读取对应的XML标注文件
            annotation_filename = os.path.splitext(filename)[0] + ".xml"
            annotation_path = os.path.join(xml_folder, annotation_filename)
            if not os.path.isfile(annotation_path):
                continue

            # 解析XML文件
            tree = ET.parse(annotation_path)
            root = tree.getroot()

            for obj in root.findall("object"):
                # 更新标注框的坐标信息
                xmin = int(obj.find("bndbox/xmin").text)
                ymin = int(obj.find("bndbox/ymin").text)
                xmax = int(obj.find("bndbox/xmax").text)
                ymax = int(obj.find("bndbox/ymax").text)

                obj.find("bndbox/xmin").text = str(ymin)
                obj.find("bndbox/ymin").text = str(image.width - xmax)
                obj.find("bndbox/xmax").text = str(ymax)
                obj.find("bndbox/ymax").text = str(image.width - xmin)

            # 生成新的XML文件
            rotated_xml_filename = os.path.splitext(filename)[0] + "_rotated.xml"
            rotated_xml_path = os.path.join(output_xml_folder, rotated_xml_filename)
            tree.write(rotated_xml_path)

            print(f"Rotated image '{rotated_filename}' and generated XML '{rotated_xml_filename}'.")

image_folder = './data/img'  # 输入图像文件夹路径
xml_folder = './data/xml'  # 输入XML文件夹路径
output_image_folder = './data/img_new'  # 输出图像文件夹路径
output_xml_folder = './data/xml_new'  # 输出XML文件夹路径

rotate_and_generate_xml(image_folder, xml_folder, output_image_folder, output_xml_folder)

6.综合多种(记不清在哪看的别人的,也没找到对应的博文)

# 包括:
#     1. 裁剪(需改变bbox)
#     2. 平移(需改变bbox)
#     3. 改变亮度
#     4. 加噪声
#     5. 旋转角度(需要改变bbox)
#     6. 镜像(需要改变bbox)
#     7. cutout

import time
import random
import cv2
import os
import re
import math
import numpy as np
from skimage.util import random_noise
from skimage import exposure

class DataAugmentForObjectDetection():
    def __init__(self, rotation_rate, max_rotation_angle,
                 crop_rate, shift_rate, change_light_rate,
                 add_noise_rate, flip_rate,
                 cutout_rate, cut_out_length, cut_out_holes, cut_out_threshold):
        self.rotation_rate = rotation_rate
        self.max_rotation_angle = max_rotation_angle
        self.crop_rate = crop_rate
        self.shift_rate = shift_rate
        self.change_light_rate = change_light_rate
        self.add_noise_rate = add_noise_rate
        self.flip_rate = flip_rate
        self.cutout_rate = cutout_rate

        self.cut_out_length = cut_out_length
        self.cut_out_holes = cut_out_holes
        self.cut_out_threshold = cut_out_threshold

    # 加噪声
    def _addNoise(self, img):
        '''
        输入:
            img:图像array
        输出:
            加噪声后的图像array,由于输出的像素是在[0,1]之间,所以得乘以255
        '''
        # random.seed(int(time.time()))
        # return random_noise(img, mode='gaussian', seed=int(time.time()), clip=True)*255
        return random_noise(img, mode='gaussian', clip=True) * 255

    # 调整亮度
    # flag>1为调暗,小于1为调亮,如果想在更大范围调整亮度,可以把flag的取值范围调大一点
    def _changeLight(self, img):
        # random.seed(int(time.time()))
        flag = random.uniform(0.5, 1.5)
        return exposure.adjust_gamma(img, flag)

    # cutout
    def _cutout(self, img, bboxes, length=100, n_holes=1, threshold=0.5):
        '''
        Args:
            img : a 3D numpy array,(h,w,c)
            bboxes : 框的坐标
            n_holes (int): 每张图片中,黑色正方形的数量(失活的部分)
            length (int): 每个正方形黑块的边长(像素点)
        '''

        def cal_iou(boxA, boxB):
            '''
            boxA, boxB为两个框,返回iou
            boxB为bouding box
            '''

            # determine the (x, y)-coordinates of the intersection rectangle
            xA = max(boxA[0], boxB[0])
            yA = max(boxA[1], boxB[1])
            xB = min(boxA[2], boxB[2])
            yB = min(boxA[3], boxB[3])

            if xB <= xA or yB <= yA:
                return 0.0

            # compute the area of intersection rectangle
            interArea = (xB - xA + 1) * (yB - yA + 1)

            # compute the area of both the prediction and ground-truth
            # rectangles
            boxAArea = (boxA[2] - boxA[0] + 1) * (boxA[3] - boxA[1] + 1)
            boxBArea = (boxB[2] - boxB[0] + 1) * (boxB[3] - boxB[1] + 1)

            # compute the intersection over union by taking the intersection
            # area and dividing it by the sum of prediction + ground-truth
            # areas - the interesection area
            # iou = interArea / float(boxAArea + boxBArea - interArea)
            iou = interArea / float(boxBArea)

            # return the intersection over union value
            return iou

        # 得到h和w
        if img.ndim == 3:
            h, w, c = img.shape
        else:
            _, h, w, c = img.shape

        mask = np.ones((h, w, c), np.float32)

        for n in range(n_holes):

            chongdie = True  # 看切割的区域是否与box重叠太多

            while chongdie:
                y = np.random.randint(h)
                x = np.random.randint(w)

                y1 = np.clip(y - length // 2, 0,
                             h)  # numpy.clip(a, a_min, a_max, out=None), clip这个函数将将数组中的元素限制在a_min, a_max之间,大于a_max的就使得它等于 a_max,小于a_min,的就使得它等于a_min
                y2 = np.clip(y + length // 2, 0, h)
                x1 = np.clip(x - length // 2, 0, w)
                x2 = np.clip(x + length // 2, 0, w)

                chongdie = False
                for box in bboxes:
                    if cal_iou([x1, y1, x2, y2], box) > threshold:
                        chongdie = True
                        break

            mask[y1: y2, x1: x2, :] = 0.

        # mask = np.expand_dims(mask, axis=0)
        img = img * mask

        return img

    # 旋转
    def _rotate_img_bbox(self, img, bboxes, angle=5, scale=1.):
        '''
        输入:
            img:图像array,(h,w,c)
            bboxes:该图像包含的所有boundingboxs,一个list,每个元素为[x_min, y_min, x_max, y_max],要确保是数值
            angle:旋转角度
            scale:默认1
        输出:
            rot_img:旋转后的图像array
            rot_bboxes:旋转后的boundingbox坐标list
        '''
        # ---------------------- 旋转图像 ----------------------
        w = img.shape[1]
        h = img.shape[0]
        # 角度变弧度
        rangle = np.deg2rad(angle)  # angle in radians
        # now calculate new image width and height
        nw = (abs(np.sin(rangle) * h) + abs(np.cos(rangle) * w)) * scale
        nh = (abs(np.cos(rangle) * h) + abs(np.sin(rangle) * w)) * scale
        # ask OpenCV for the rotation matrix
        rot_mat = cv2.getRotationMatrix2D((nw * 0.5, nh * 0.5), angle, scale)
        # calculate the move from the old center to the new center combined
        # with the rotation
        rot_move = np.dot(rot_mat, np.array([(nw - w) * 0.5, (nh - h) * 0.5, 0]))
        # the move only affects the translation, so update the translation
        # part of the transform
        rot_mat[0, 2] += rot_move[0]
        rot_mat[1, 2] += rot_move[1]
        # 仿射变换
        rot_img = cv2.warpAffine(img, rot_mat, (int(math.ceil(nw)), int(math.ceil(nh))), flags=cv2.INTER_LANCZOS4)

        # ---------------------- 矫正bbox坐标 ----------------------
        # rot_mat是最终的旋转矩阵
        # 获取原始bbox的四个中点,然后将这四个点转换到旋转后的坐标系下
        rot_bboxes = list()
        for bbox in bboxes:
            xmin = bbox[0]
            ymin = bbox[1]
            xmax = bbox[2]
            ymax = bbox[3]
            point1 = np.dot(rot_mat, np.array([(xmin + xmax) / 2, ymin, 1]))
            point2 = np.dot(rot_mat, np.array([xmax, (ymin + ymax) / 2, 1]))
            point3 = np.dot(rot_mat, np.array([(xmin + xmax) / 2, ymax, 1]))
            point4 = np.dot(rot_mat, np.array([xmin, (ymin + ymax) / 2, 1]))
            # 合并np.array
            concat = np.vstack((point1, point2, point3, point4))
            # 改变array类型
            concat = concat.astype(np.int32)
            # 得到旋转后的坐标
            rx, ry, rw, rh = cv2.boundingRect(concat)
            rx_min = rx
            ry_min = ry
            rx_max = rx + rw
            ry_max = ry + rh
            # 加入list中
            rot_bboxes.append([rx_min, ry_min, rx_max, ry_max, bbox[4]])

        return rot_img, rot_bboxes

    # 裁剪
    def _crop_img_bboxes(self, img, bboxes):
        '''
        裁剪后的图片要包含所有的框
        输入:
            img:图像array
            bboxes:该图像包含的所有boundingboxs,一个list,每个元素为[x_min, y_min, x_max, y_max],要确保是数值
        输出:
            crop_img:裁剪后的图像array
            crop_bboxes:裁剪后的bounding box的坐标list
        '''
        # ---------------------- 裁剪图像 ----------------------
        w = img.shape[1]
        h = img.shape[0]
        x_min = w  # 裁剪后的包含所有目标框的最小的框
        x_max = 0
        y_min = h
        y_max = 0
        for bbox in bboxes:
            x_min = min(x_min, bbox[0])
            y_min = min(y_min, bbox[1])
            x_max = max(x_max, bbox[2])
            y_max = max(y_max, bbox[3])

        d_to_left = x_min  # 包含所有目标框的最小框到左边的距离
        d_to_right = w - x_max  # 包含所有目标框的最小框到右边的距离
        d_to_top = y_min  # 包含所有目标框的最小框到顶端的距离
        d_to_bottom = h - y_max  # 包含所有目标框的最小框到底部的距离
        print(d_to_left)
        print(d_to_bottom)
        print(d_to_right)
        # 随机扩展这个最小框
        crop_x_min = int(x_min - random.uniform(0, d_to_left))
        crop_y_min = int(y_min - random.uniform(0, d_to_top))
        crop_x_max = int(x_max + random.uniform(0, d_to_right))
        crop_y_max = int(y_max + random.uniform(0, d_to_bottom))

        # 随机扩展这个最小框 , 防止别裁的太小
        # crop_x_min = int(x_min - random.uniform(d_to_left//2, d_to_left))
        # crop_y_min = int(y_min - random.uniform(d_to_top//2, d_to_top))
        # crop_x_max = int(x_max + random.uniform(d_to_right//2, d_to_right))
        # crop_y_max = int(y_max + random.uniform(d_to_bottom//2, d_to_bottom))

        # 确保不要越界
        crop_x_min = max(0, crop_x_min)
        crop_y_min = max(0, crop_y_min)
        crop_x_max = min(w, crop_x_max)
        crop_y_max = min(h, crop_y_max)

        crop_img = img[crop_y_min:crop_y_max, crop_x_min:crop_x_max]

        # ---------------------- 裁剪boundingbox ----------------------
        # 裁剪后的boundingbox坐标计算
        crop_bboxes = list()
        for bbox in bboxes:
            crop_bboxes.append(
                [bbox[0] - crop_x_min, bbox[1] - crop_y_min, bbox[2] - crop_x_min, bbox[3] - crop_y_min, bbox[4]])

        return crop_img, crop_bboxes

    # 平移
    def _shift_pic_bboxes(self, img, bboxes):
        '''
        平移后的图片要包含所有的框
        输入:
            img:图像array
            bboxes:该图像包含的所有boundingboxs,一个list,每个元素为[x_min, y_min, x_max, y_max],要确保是数值
        输出:
            shift_img:平移后的图像array
            shift_bboxes:平移后的bounding box的坐标list
        '''
        # ---------------------- 平移图像 ----------------------
        w = img.shape[1]
        h = img.shape[0]
        x_min = w  # 裁剪后的包含所有目标框的最小的框
        x_max = 0
        y_min = h
        y_max = 0
        for bbox in bboxes:
            x_min = min(x_min, bbox[0])
            y_min = min(y_min, bbox[1])
            x_max = max(x_max, bbox[2])
            y_max = max(y_max, bbox[3])

        d_to_left = x_min  # 包含所有目标框的最大左移动距离
        d_to_right = w - x_max  # 包含所有目标框的最大右移动距离
        d_to_top = y_min  # 包含所有目标框的最大上移动距离
        d_to_bottom = h - y_max  # 包含所有目标框的最大下移动距离

        x = random.uniform(-(d_to_left - 1) / 3, (d_to_right - 1) / 3)
        y = random.uniform(-(d_to_top - 1) / 3, (d_to_bottom - 1) / 3)

        M = np.float32([[1, 0, x], [0, 1, y]])  # x为向左或右移动的像素值,正为向右负为向左; y为向上或者向下移动的像素值,正为向下负为向上
        shift_img = cv2.warpAffine(img, M, (img.shape[1], img.shape[0]))

        # ---------------------- 平移boundingbox ----------------------
        shift_bboxes = list()
        for bbox in bboxes:
            shift_bboxes.append([bbox[0] + x, bbox[1] + y, bbox[2] + x, bbox[3] + y, bbox[4]])

        return shift_img, shift_bboxes

    # 镜像
    def _filp_pic_bboxes(self, img, bboxes):
        '''
            平移后的图片要包含所有的框
            输入:
                img:图像array
                bboxes:该图像包含的所有boundingboxs,一个list,每个元素为[x_min, y_min, x_max, y_max],要确保是数值
            输出:
                flip_img:平移后的图像array
                flip_bboxes:平移后的bounding box的坐标list
        '''
        # ---------------------- 翻转图像 ----------------------
        import copy
        flip_img = copy.deepcopy(img)
        if random.random() < 0.5:  # 0.5的概率水平翻转,0.5的概率垂直翻转
            horizon = True
        else:
            horizon = False
        h, w, _ = img.shape
        if horizon:  # 水平翻转
            flip_img = cv2.flip(flip_img, 1)  # 1是水平,-1是水平垂直
        else:
            flip_img = cv2.flip(flip_img, 0)

        # ---------------------- 调整boundingbox ----------------------
        flip_bboxes = list()
        for box in bboxes:
            x_min = box[0]
            y_min = box[1]
            x_max = box[2]
            y_max = box[3]
            if horizon:
                flip_bboxes.append([w - x_max, y_min, w - x_min, y_max, box[4]])
            else:
                flip_bboxes.append([x_min, h - y_max, x_max, h - y_min, box[4]])

        return flip_img, flip_bboxes

    def dataAugment(self, img, bboxes):
        '''
        图像增强
        输入:
            img:图像array
            bboxes:该图像的所有框坐标
        输出:
            img:增强后的图像
            bboxes:增强后图片对应的box
        '''
        change_num = 0  # 改变的次数
        print('------')
        img, bboxes = self._crop_img_bboxes(img, bboxes)
        while change_num < 1:  # 默认至少有一种数据增强生效
            if random.random() < self.crop_rate:  # 裁剪
                print('裁剪')
                change_num += 1
                img, bboxes = self._crop_img_bboxes(img, bboxes)

            if random.random() > self.rotation_rate:  # 旋转
                print('旋转')
                change_num += 1
                # angle = random.uniform(-self.max_rotation_angle, self.max_rotation_angle)
                angle = random.sample([90, 180, 270], 1)[0]
                scale = random.uniform(0.7, 0.8)
                img, bboxes = self._rotate_img_bbox(img, bboxes, angle, scale)

            if random.random() < self.shift_rate:  # 平移
                print('平移')
                change_num += 1
                img, bboxes = self._shift_pic_bboxes(img, bboxes)

            if random.random() > self.change_light_rate:  # 改变亮度
                print('亮度')
                change_num += 1
                img = self._changeLight(img)

            if random.random() < self.add_noise_rate:  # 加噪声
                print('加噪声')
                change_num += 1
                img = self._addNoise(img)

            if random.random() < self.cutout_rate:  # cutout
                print('cutout')
                change_num += 1
                img = self._cutout(img, bboxes, length=self.cut_out_length, n_holes=self.cut_out_holes,
                                   threshold=self.cut_out_threshold)

            if random.random() < self.flip_rate:  # 翻转
                print('翻转')
                change_num += 1
                img, bboxes = self._filp_pic_bboxes(img, bboxes)
            print('\n')
        # print('------')
        return img, bboxes


if __name__ == '__main__':
    import shutil
    from xml_helper import *

    # 每张图片需要增强的次数
    need_aug_num = 1
    # 原图片和标签文件的目录以及增强后图片和标签文件的目录,若保存路径为空,则保存在原图片以及标签文件目录下面
    source_pic_root_path = './data/img'
    source_xml_root_path = './data/xml'
    save_pic_path = './data/img_new'
    save_xml_path = './data/xml_new'
    for path in [save_pic_path, save_xml_path]:
        if path and not os.path.exists(path):
            os.mkdir(path)
    # 传给DataAugmentForObjectDetection一些参数就可以设定想要进行的数据增强操作,比如设置rotation_rate = 0.5,
    # 表示进行平移操作的概率是50%,max_rotation_angle=5表示最大平移的角度为5°,因此可以通过调整概率来调整增强的方式
    # 对于cutout来说,可以设置的还有:
    # cut_out_length:     黑色正方形的像素大小
    # cut_out_holes:     每张图片黑色正方形的个数
    # cutout_rate:       执行cutout的概率
    # cut_out_threshold: cutout的阈值,即判断失活的区域和实例目标的重叠部分像素占该目标全部像素的比值,若大于这个阈值则舍弃
    dataAug = DataAugmentForObjectDetection(rotation_rate=0.0,
                                            max_rotation_angle=5,
                                            crop_rate=0.5,
                                            shift_rate=0.0,
                                            change_light_rate=0.0,
                                            add_noise_rate=0,
                                            flip_rate=0.5,
                                            cutout_rate=0.0,
                                            cut_out_length=50,
                                            cut_out_holes=1,
                                            cut_out_threshold=0.5)
    for parent, _, files in os.walk(source_pic_root_path):
        for file in files:
            after = 0
            first = 0
            cnt = 0
            # 获取图片和标签的总路径
            pic_path = os.path.join(parent, file)
            # 若图片不是jpg格式的,请手动把下面的.jpg修改成指定的格式
            n = file.rfind('.')
            xml_path = os.path.join(source_xml_root_path, file[:-4] + '.xml')
            coords, shape = parse_xml(xml_path)
            # 解析得到box信息,格式为[[x_min,y_min,x_max,y_max,name]]
            coords1 = [coord[:4] for coord in coords]
            img = cv2.imread(pic_path)
            while cnt < need_aug_num:`在这里插入代码片`
                # 增强后的图片名, 统一保存成jpg格式的
                auged_pic_path = file[:n] + 'auged' + str(cnt) + '.jpg'
                # 使用dataAugment后输出的是增强后的图片和边框信息
                auged_img, auged_bboxes = dataAug.dataAugment(img, coords)
                # 增强后的图片和标签保存在原图片和标签的文件夹
                cv2.imwrite(os.path.join(save_pic_path, auged_pic_path), auged_img)
                # 写入新的标签文件,名字和图片名一致,只是后缀不一样
                generate_xml(auged_pic_path, auged_bboxes, shape, save_xml_path)
                cnt += 1
                # 显示强化后的图
                # show_pic(auged_img, auged_bboxes)
# -*- coding=utf-8 -*-
import xml.etree.ElementTree as ET
import xml.dom.minidom as DOC
import os


# 从xml文件中提取bounding box信息, 格式为[[x_min, y_min, x_max, y_max, name]]
def parse_xml(xml_path):
    '''
    输入:
        xml_path: xml的文件路径
    输出:
        从xml文件中提取bounding box信息, 格式为[[x_min, y_min, x_max, y_max, name]]
        以及输出图片的形状,格式为[width,height,depth]
    '''
    tree = ET.parse(xml_path)
    root = tree.getroot()
    objs = root.findall('object')
    size = root.findall('size')
    coords = list()
    for ix, s in enumerate(size):
        width = s.find("width").text
        height = s.find("height").text
        depth = s.find("depth").text
        shape = [width, height, depth]

    for ix, obj in enumerate(objs):
        name = obj.find('name').text
        box = obj.find('bndbox')
        x_min = int(box[0].text)
        y_min = int(box[1].text)
        x_max = int(box[2].text)
        y_max = int(box[3].text)
        coords.append([x_min, y_min, x_max, y_max, name])
    return coords, shape


# 将bounding box信息写入xml文件中, bouding box格式为[[x_min, y_min, x_max, y_max, name]]
def generate_xml(img_name, coords, img_size, out_root_path):
    '''
    输入:
        img_name:图片名称,如a.jpg
        coords:坐标list,格式为[[x_min, y_min, x_max, y_max, name]],name为概况的标注
        img_size:图像的大小,格式为[h,w,c]
        out_root_path: xml文件输出的根路径
    '''
    doc = DOC.Document()  # 创建DOM文档对象
    img_root_path = './VOCdevkit/VOC2007/JPEGImages/'
    img_path = img_root_path + img_name
    annotation = doc.createElement('annotation')
    doc.appendChild(annotation)

    title = doc.createElement('folder')
    title_text = doc.createTextNode('Tianchi')
    title.appendChild(title_text)
    annotation.appendChild(title)

    title = doc.createElement('filename')
    title_text = doc.createTextNode(img_path)
    title.appendChild(title_text)
    annotation.appendChild(title)

    source = doc.createElement('source')
    annotation.appendChild(source)

    title = doc.createElement('database')
    title_text = doc.createTextNode('The Tianchi Database')
    title.appendChild(title_text)
    source.appendChild(title)

    title = doc.createElement('annotation')
    title_text = doc.createTextNode('Tianchi')
    title.appendChild(title_text)
    source.appendChild(title)

    size = doc.createElement('size')
    annotation.appendChild(size)

    title = doc.createElement('width')
    title_text = doc.createTextNode(str(img_size[0]))
    title.appendChild(title_text)
    size.appendChild(title)

    title = doc.createElement('height')
    title_text = doc.createTextNode(str(img_size[1]))
    title.appendChild(title_text)
    size.appendChild(title)

    title = doc.createElement('depth')
    title_text = doc.createTextNode(str(img_size[2]))
    title.appendChild(title_text)
    size.appendChild(title)

    for coord in coords:
        object = doc.createElement('object')
        annotation.appendChild(object)

        title = doc.createElement('name')
        title_text = doc.createTextNode(coord[4])
        title.appendChild(title_text)
        object.appendChild(title)

        pose = doc.createElement('pose')
        pose.appendChild(doc.createTextNode('Unspecified'))
        object.appendChild(pose)
        truncated = doc.createElement('truncated')
        truncated.appendChild(doc.createTextNode('1'))
        object.appendChild(truncated)
        difficult = doc.createElement('difficult')
        difficult.appendChild(doc.createTextNode('0'))
        object.appendChild(difficult)

        bndbox = doc.createElement('bndbox')
        object.appendChild(bndbox)
        title = doc.createElement('xmin')
        title_text = doc.createTextNode(str(int(float(coord[0]))))
        title.appendChild(title_text)
        bndbox.appendChild(title)
        title = doc.createElement('ymin')
        title_text = doc.createTextNode(str(int(float(coord[1]))))
        title.appendChild(title_text)
        bndbox.appendChild(title)
        title = doc.createElement('xmax')
        title_text = doc.createTextNode(str(int(float(coord[2]))))
        title.appendChild(title_text)
        bndbox.appendChild(title)
        title = doc.createElement('ymax')
        title_text = doc.createTextNode(str(int(float(coord[3]))))
        title.appendChild(title_text)
        bndbox.appendChild(title)

    # 将DOM对象doc写入文件
    f = open(os.path.join(out_root_path, img_name[:-4] + '.xml'), 'w')
    f.write(doc.toprettyxml(indent=''))
    f.close()
  • 0
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 1
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值