深度学习
深度学习大量的样本,即使基于迁移学习,但原样本可能并不适用当前的识别场景,还是需要大量的数据去进行微调。
数据增强
使用imgaug
进行数据增强,他可以把标注点和图像同步进行变化。github:https://github.com/aleju/imgaug
windows底下使用pip安装imgaug
,需要依赖库geos,需要将geos.dll``geos_c.dll
放在环境中(系统环境变量Path中,我放在了system32)。
Geos需要从官网或者github进行下载,进行Cmake+VS编译出动态链接库。不会的可以直接下载:https://download.csdn.net/download/u012525096/10894043
# 依赖库
pip install six numpy scipy Pillow matplotlib scikit-image opencv-python imageio Shapely
# 安装imgaug
pip install imgaug
# install the latest version directly from github:
pip install git+https://github.com/aleju/imgaug
代码
这里采用的是VIA进行标注的VGG通用格式的标注文件,其它格式请自己解析。
"""
Script to verify all examples in the readme.
Simply execute
python test_readme_examples.py
"""
from __future__ import print_function, division
import numpy as np
import imgaug as ia
from imgaug import augmenters as iaa
from PIL import Image
import json
import os
import copy
import shutil
def main():
old = "旧目录"
new = "新目录"
# # 旋转10度
# data_augmentation(old,
# new, "rotate_10",
# iaa.Sequential([iaa.Affine(rotate=10)]))
# # 旋转-10度
# data_augmentation(old,
# new, "rotate_-10",
# iaa.Sequential([iaa.Affine(rotate=-10)]))
# 高斯模糊(三个等级)
# data_augmentation(old,
# new, "GaussianBlur_low",
# iaa.Sequential([iaa.GaussianBlur(sigma=1)]))
# data_augmentation(old,
# new, "GaussianBlur_mid",
# iaa.Sequential([iaa.GaussianBlur(sigma=2)]))
# data_augmentation(old,
# new, "GaussianBlur_high",
# iaa.Sequential([iaa.GaussianBlur(sigma=3)]))
# # 高斯噪声(两个等级)
# data_augmentation(old,
# new, "AdditiveGaussianNoise_5",
# iaa.Sequential([iaa.AdditiveGaussianNoise(scale=5)]))
# data_augmentation(old,
# new, "AdditiveGaussianNoise_10",
# iaa.Sequential([iaa.AdditiveGaussianNoise(scale=10)]))
# # 亮度变化
# data_augmentation(old,
# new, "light_1.15",
# iaa.Sequential([iaa.Multiply(mul=1.15)]))
# data_augmentation(old,
# new, "light_1.3",
# iaa.Sequential([iaa.Multiply(mul=1.3)]))
# data_augmentation(old,
# new, "light_0.85",
# iaa.Sequential([iaa.Multiply(mul=0.85)]))
# data_augmentation(old,
# new, "light_0.7",
# iaa.Sequential([iaa.Multiply(mul=0.7)]))
# # 中心缩放
# data_augmentation(old,
# new, "Affine_scale_1.5",
# iaa.Sequential([iaa.Affine(scale={"x": 1.5, "y": 1.5})]))
# data_augmentation(old,
# new, "Affine_scale_0.8",
# iaa.Sequential([iaa.Affine(scale={"x": 0.8, "y": 0.8})]))
# #位移
# data_augmentation(old,
# new, "Affine_xy_20",
# iaa.Sequential([iaa.Affine(translate_px={"x": 20, "y": 20})]))
# data_augmentation(old,
# new, "Affine_x_10",
# iaa.Sequential([iaa.Affine(translate_px={"x": 10, "y": 0})]))
# ######################################合并上面扩展的信息################################################
merge_data("合并前的目录", "合并后的目录")
# ######################################镜像翻转上面合并的信息################################################
# # 镜像
# flip_all("目录下所有文件进行镜像")
def flip_all(datasets_path):
# 源目录下所有文件
datasets_path_children = os.listdir(datasets_path)
for temp in datasets_path_children:
data_augmentation(os.path.join(datasets_path, temp),
os.path.join(datasets_path, temp), "_flip",
iaa.Sequential([iaa.Fliplr(1)]))
def merge_data(datasets_path, new_dataset):
# 空的目录集合
datasets = []
# 源目录下所有文件
datasets_path_children = os.listdir(datasets_path)
# 增加目录
for datasets_path_child in datasets_path_children:
tmp_path = os.path.join(datasets_path, datasets_path_child)
if os.path.isdir(tmp_path):
datasets.append(str(tmp_path))
# 创建新的目录
if not os.path.exists(new_dataset):
os.makedirs(new_dataset)
# ############################################合并标注################################################
# 空的标注文件
annotations = {}
# 合并标注
for dataset in datasets:
annotation = json.load(open(os.path.join(dataset, "via_region_data.json")))
annotations.update(annotation)
# 写标注文件
with open(os.path.join(new_dataset, "via_region_data.json"), 'w') as f:
json.dump(annotations, f)
# ############################################合并图片################################################
for dataset in datasets:
files = os.listdir(dataset)
for file in files:
# 只复制图片
if not file.endswith('json'):
shutil.copyfile(os.path.join(dataset, file), os.path.join(new_dataset, file))
def data_augmentation(dataset_dir_old, dataset_dir_new_prefix, iaa_name, seq):
print("数据扩展 By flip: Executing! ")
# 确定变换法则
seq_det = seq.to_deterministic()
# 判断文件夹是否存在,没有则创建
dataset_dir_new = dataset_dir_new_prefix + iaa_name
if os.path.exists(dataset_dir_new).__eq__(False):
os.makedirs(dataset_dir_new)
# 加载标注信息
annotations = json.load(open(os.path.join(dataset_dir_old, "via_region_data.json")))
annotations_new = copy.deepcopy(annotations)
annotations_new_keys = []
# 拿到键值对(老的)
for key in annotations_new:
annotations_new_keys.append(key)
# 不要最外层的Key,内层是List
annotations_values = list(annotations.values())
# 判断是否有Regions属性,构建新的List
annotations_values = [a for a in annotations_values if a['regions']]
# 遍历列表
for i, (annotations_value) in enumerate(annotations_values):
# 对应的关键点
key_points_old = []
if type(annotations_value['regions']) is dict:
polygons = [r['shape_attributes'] for r in annotations_value['regions'].values()]
else:
polygons = [r['shape_attributes'] for r in annotations_value['regions']]
# 增加图片
filename = annotations_value['filename']
image_old = Image.open(os.path.join(dataset_dir_old, filename))
image_old = np.array(image_old)
# polygons是个List,包括了一张图的多个Region
for j, (b) in enumerate(polygons):
# 增加该图片的关键点
key_points = []
for k in range(0, len(b['all_points_x'])):
try:
x_old = annotations_new[annotations_new_keys[i]]['regions'][j]['shape_attributes']['all_points_x'][
k]
y_old = annotations_new[annotations_new_keys[i]]['regions'][j]['shape_attributes']['all_points_y'][
k]
x = b['all_points_x'][k]
y = b['all_points_y'][k]
# print('old:(%d,%d) new(%d,%d)' % (x_old, y_old, x, y))
key_points.append(ia.Keypoint(x=x, y=y))
except IndexError:
print("Error: i:" + str(i) + " name:" + annotations_new_keys[i] + " j:" + str(j) + " k:" + str(k))
key_points_old.append(ia.KeypointsOnImage(key_points, shape=image_old.shape))
# 图像变换
image_new = seq_det.augment_image(image_old)
# 关键点变换,是个List,多个Region
key_points_new = seq_det.augment_keypoints(key_points_old)
# 新的文件名
image_file_name = filename.replace(".png", "_" + iaa_name + ".png")
image_path_new = os.path.join(dataset_dir_new, image_file_name)
# 保存新图像
image_new = Image.fromarray(image_new.astype('uint8')).convert('RGB')
image_new.save(image_path_new, "PNG")
# 先获取文件大小
image_size = os.path.getsize(image_path_new)
# 替换Json的Key
annotations_new.update({image_file_name + str(image_size): annotations_new.pop(annotations_new_keys[i])})
# 更新后的Key
annotations_new_keys[i] = image_file_name + str(image_size)
# 更新filename
annotations_new[annotations_new_keys[i]]['filename'] = image_file_name
# 更新size
annotations_new[annotations_new_keys[i]]['size'] = image_size
# 遍历变换后的点集合(新),和老点数量相同,此处idx相当于上文的j
for j in range(0, len(key_points_new)):
for k, (key_point) in enumerate(key_points_new[j].keypoints):
x_old = annotations_new[annotations_new_keys[i]]['regions'][j]['shape_attributes']['all_points_x'][k]
y_old = annotations_new[annotations_new_keys[i]]['regions'][j]['shape_attributes']['all_points_y'][k]
x_new = key_point.x
y_new = key_point.y
annotations_new[annotations_new_keys[i]]['regions'][j]['shape_attributes']['all_points_x'][k] = x_new
annotations_new[annotations_new_keys[i]]['regions'][j]['shape_attributes']['all_points_y'][k] = y_new
# # 画老图
# image_old = key_points_old[j].draw_on_image(image_old)
# # 画新图
# image_new = key_points_new[j].draw_on_image(image_new)
# # 显示
# ia.imshow(np.concatenate((image_old, image_new), axis=1))
# print(type(annotations_new))
with open(os.path.join(dataset_dir_new, "via_region_data.json"), 'w') as f:
json.dump(annotations_new, f)
print('数据扩展 By flip: Done! ')
if __name__ == "__main__":
main()