数据增强/数据扩展/Data Augmentation/imgaug

最新推荐文章于 2024-02-27 02:28:35 发布

从程序猿到程序员

最新推荐文章于 2024-02-27 02:28:35 发布

阅读量1.5k

点赞数 1

分类专栏：数据增强/扩展/Data Augmentation

本文链接：https://blog.csdn.net/u012525096/article/details/88049007

版权

数据增强/扩展/Data Augmentation 专栏收录该内容

1 篇文章 0 订阅

订阅专栏

深度学习

深度学习大量的样本，即使基于迁移学习，但原样本可能并不适用当前的识别场景，还是需要大量的数据去进行微调。

数据增强

使用imgaug进行数据增强，他可以把标注点和图像同步进行变化。github：https://github.com/aleju/imgaug

windows底下使用pip安装imgaug，需要依赖库geos，需要将geos.dll``geos_c.dll放在环境中(系统环境变量Path中，我放在了system32)。
Geos需要从官网或者github进行下载，进行Cmake+VS编译出动态链接库。不会的可以直接下载：https://download.csdn.net/download/u012525096/10894043

# 依赖库
pip install six numpy scipy Pillow matplotlib scikit-image opencv-python imageio Shapely
# 安装imgaug
pip install imgaug
# install the latest version directly from github:
pip install git+https://github.com/aleju/imgaug

代码

这里采用的是VIA进行标注的VGG通用格式的标注文件，其它格式请自己解析。

"""
Script to verify all examples in the readme.
Simply execute
    python test_readme_examples.py
"""
from __future__ import print_function, division

import numpy as np
import imgaug as ia
from imgaug import augmenters as iaa
from PIL import Image
import json
import os
import copy
import shutil


def main():
    old = "旧目录"
    new = "新目录"
    # # 旋转10度
    # data_augmentation(old,
    #                   new, "rotate_10",
    #                   iaa.Sequential([iaa.Affine(rotate=10)]))
    # # 旋转-10度
    # data_augmentation(old,
    #                   new, "rotate_-10",
    #                   iaa.Sequential([iaa.Affine(rotate=-10)]))
    # 高斯模糊(三个等级)
    # data_augmentation(old,
    #                   new, "GaussianBlur_low",
    #                   iaa.Sequential([iaa.GaussianBlur(sigma=1)]))
    # data_augmentation(old,
    #                   new, "GaussianBlur_mid",
    #                   iaa.Sequential([iaa.GaussianBlur(sigma=2)]))
    # data_augmentation(old,
    #                   new, "GaussianBlur_high",
    #                   iaa.Sequential([iaa.GaussianBlur(sigma=3)]))
    # # 高斯噪声(两个等级)
    # data_augmentation(old,
    #                   new, "AdditiveGaussianNoise_5",
    #                   iaa.Sequential([iaa.AdditiveGaussianNoise(scale=5)]))
    # data_augmentation(old,
    #                   new, "AdditiveGaussianNoise_10",
    #                   iaa.Sequential([iaa.AdditiveGaussianNoise(scale=10)]))
    # # 亮度变化
    # data_augmentation(old,
    #                   new, "light_1.15",
    #                   iaa.Sequential([iaa.Multiply(mul=1.15)]))
    # data_augmentation(old,
    #                   new, "light_1.3",
    #                   iaa.Sequential([iaa.Multiply(mul=1.3)]))
    # data_augmentation(old,
    #                   new, "light_0.85",
    #                   iaa.Sequential([iaa.Multiply(mul=0.85)]))
    # data_augmentation(old,
    #                   new, "light_0.7",
    #                   iaa.Sequential([iaa.Multiply(mul=0.7)]))
    # # 中心缩放
    # data_augmentation(old,
    #                   new, "Affine_scale_1.5",
    #                   iaa.Sequential([iaa.Affine(scale={"x": 1.5, "y": 1.5})]))
    # data_augmentation(old,
    #                   new, "Affine_scale_0.8",
    #                   iaa.Sequential([iaa.Affine(scale={"x": 0.8, "y": 0.8})]))

    # #位移
    # data_augmentation(old,
    #                   new, "Affine_xy_20",
    #                   iaa.Sequential([iaa.Affine(translate_px={"x": 20, "y": 20})]))
    # data_augmentation(old,
    #                   new, "Affine_x_10",
    #                   iaa.Sequential([iaa.Affine(translate_px={"x": 10, "y": 0})]))

    # ######################################合并上面扩展的信息################################################
    merge_data("合并前的目录", "合并后的目录")
    # ######################################镜像翻转上面合并的信息################################################
	# # 镜像
    # flip_all("目录下所有文件进行镜像")

def flip_all(datasets_path):
    # 源目录下所有文件
    datasets_path_children = os.listdir(datasets_path)
    for temp in datasets_path_children:
        data_augmentation(os.path.join(datasets_path, temp),
                          os.path.join(datasets_path, temp), "_flip",
                          iaa.Sequential([iaa.Fliplr(1)]))


def merge_data(datasets_path, new_dataset):
    # 空的目录集合
    datasets = []
    # 源目录下所有文件
    datasets_path_children = os.listdir(datasets_path)
    # 增加目录
    for datasets_path_child in datasets_path_children:
        tmp_path = os.path.join(datasets_path, datasets_path_child)
        if os.path.isdir(tmp_path):
            datasets.append(str(tmp_path))
    # 创建新的目录
    if not os.path.exists(new_dataset):
        os.makedirs(new_dataset)
    # ############################################合并标注################################################
    # 空的标注文件
    annotations = {}
    # 合并标注
    for dataset in datasets:
        annotation = json.load(open(os.path.join(dataset, "via_region_data.json")))
        annotations.update(annotation)
    # 写标注文件
    with open(os.path.join(new_dataset, "via_region_data.json"), 'w') as f:
        json.dump(annotations, f)
    # ############################################合并图片################################################
    for dataset in datasets:
        files = os.listdir(dataset)
        for file in files:
            # 只复制图片
            if not file.endswith('json'):
                shutil.copyfile(os.path.join(dataset, file), os.path.join(new_dataset, file))


def data_augmentation(dataset_dir_old, dataset_dir_new_prefix, iaa_name, seq):
    print("数据扩展 By flip: Executing! ")
    # 确定变换法则
    seq_det = seq.to_deterministic()
    # 判断文件夹是否存在，没有则创建
    dataset_dir_new = dataset_dir_new_prefix + iaa_name
    if os.path.exists(dataset_dir_new).__eq__(False):
        os.makedirs(dataset_dir_new)
    # 加载标注信息
    annotations = json.load(open(os.path.join(dataset_dir_old, "via_region_data.json")))
    annotations_new = copy.deepcopy(annotations)
    annotations_new_keys = []
    # 拿到键值对(老的)
    for key in annotations_new:
        annotations_new_keys.append(key)

    # 不要最外层的Key,内层是List
    annotations_values = list(annotations.values())
    # 判断是否有Regions属性，构建新的List
    annotations_values = [a for a in annotations_values if a['regions']]

    # 遍历列表
    for i, (annotations_value) in enumerate(annotations_values):
        # 对应的关键点
        key_points_old = []
        if type(annotations_value['regions']) is dict:
            polygons = [r['shape_attributes'] for r in annotations_value['regions'].values()]
        else:
            polygons = [r['shape_attributes'] for r in annotations_value['regions']]
        # 增加图片
        filename = annotations_value['filename']
        image_old = Image.open(os.path.join(dataset_dir_old, filename))
        image_old = np.array(image_old)
        # polygons是个List,包括了一张图的多个Region
        for j, (b) in enumerate(polygons):
            # 增加该图片的关键点
            key_points = []
            for k in range(0, len(b['all_points_x'])):
                try:
                    x_old = annotations_new[annotations_new_keys[i]]['regions'][j]['shape_attributes']['all_points_x'][
                        k]
                    y_old = annotations_new[annotations_new_keys[i]]['regions'][j]['shape_attributes']['all_points_y'][
                        k]
                    x = b['all_points_x'][k]
                    y = b['all_points_y'][k]
                    # print('old:(%d,%d) new(%d,%d)' % (x_old, y_old, x, y))
                    key_points.append(ia.Keypoint(x=x, y=y))
                except IndexError:
                    print("Error: i:" + str(i) + " name:" + annotations_new_keys[i] + " j:" + str(j) + " k:" + str(k))
            key_points_old.append(ia.KeypointsOnImage(key_points, shape=image_old.shape))
        # 图像变换
        image_new = seq_det.augment_image(image_old)
        # 关键点变换,是个List,多个Region
        key_points_new = seq_det.augment_keypoints(key_points_old)

        # 新的文件名
        image_file_name = filename.replace(".png", "_" + iaa_name + ".png")
        image_path_new = os.path.join(dataset_dir_new, image_file_name)
        # 保存新图像
        image_new = Image.fromarray(image_new.astype('uint8')).convert('RGB')
        image_new.save(image_path_new, "PNG")
        # 先获取文件大小
        image_size = os.path.getsize(image_path_new)
        # 替换Json的Key
        annotations_new.update({image_file_name + str(image_size): annotations_new.pop(annotations_new_keys[i])})
        # 更新后的Key
        annotations_new_keys[i] = image_file_name + str(image_size)
        # 更新filename
        annotations_new[annotations_new_keys[i]]['filename'] = image_file_name
        # 更新size
        annotations_new[annotations_new_keys[i]]['size'] = image_size

        # 遍历变换后的点集合(新),和老点数量相同，此处idx相当于上文的j
        for j in range(0, len(key_points_new)):
            for k, (key_point) in enumerate(key_points_new[j].keypoints):
                x_old = annotations_new[annotations_new_keys[i]]['regions'][j]['shape_attributes']['all_points_x'][k]
                y_old = annotations_new[annotations_new_keys[i]]['regions'][j]['shape_attributes']['all_points_y'][k]
                x_new = key_point.x
                y_new = key_point.y
                annotations_new[annotations_new_keys[i]]['regions'][j]['shape_attributes']['all_points_x'][k] = x_new
                annotations_new[annotations_new_keys[i]]['regions'][j]['shape_attributes']['all_points_y'][k] = y_new
        #     # 画老图
        #     image_old = key_points_old[j].draw_on_image(image_old)
        #     # 画新图
        #     image_new = key_points_new[j].draw_on_image(image_new)
        # # 显示
        # ia.imshow(np.concatenate((image_old, image_new), axis=1))
    # print(type(annotations_new))
    with open(os.path.join(dataset_dir_new, "via_region_data.json"), 'w') as f:
        json.dump(annotations_new, f)
    print('数据扩展 By flip: Done! ')


if __name__ == "__main__":
    main()