##深度学系之数据增强(二分类)
继前几篇博客后,发现自己搭建的网络对二分类效果并不好,目前正在尝试升级网络,
首先我对数据进行了数据增强,
其次我更换了网络结构(从vgg到resnet18)
最后又对损失函数进行更改,
在以后的博客中我会陆续放出我升级网络的方法,下面让我进入我们今天的主题数据增强,
经过学习了解若干个数据增强方式后(ImageDataGenerator这种数据增强返回的是迭代器然后直接喂入网络进行训练,但是我在进行数据读取处理时继承了Sequence,所以这种数据增强方式对我来说并不是特别合适,我想要在继承Sequence的数据处理的类中进行数据增强所以我选择了imgaug库,这种数据增强方式较为灵活它是对一张图片转成的张量惊醒数据增强的,最后返回的也是一个张量,很符合我的要求所以我选择了imgaug库的数据增强方式)
imgaug数据增强的具体方法见链接传送门
import random
import scipy.misc
import numpy as np
from keras.utils import Sequence, to_categorical
import imgaug
from imgaug import augmenters as iaa
import tensorflow as tf
import os
# from keras_preprocessing.image import ImageDataGenerator
# from scipy import ndimage
train_images_folder='data/'
valid_images_folder='data/'
train_labels='data/train/train_labels.txt'
valid_labels='data/valid/val_labels.txt'
batch_size=12
H,W=224,224
#####增加的数据增强的代码
# 数据增强
seq=iaa.Sequential([
iaa.Fliplr(0.5),
iaa.Crop(percent=(0,0.01)),
iaa.Sometimes(0.5,iaa.GaussianBlur(sigma=(0,0.5))),
iaa.LinearContrast((0.75,1.5),per_channel=0.5),
iaa.AdditiveGaussianNoise(loc=0,scale=(0,0.05*255),per_channel=0.5),
iaa.Multiply((0.8,1.2),per_channel=0.2)
],random_order=True)
# datagen = ImageDataGenerator()
def read_labellist(labeldir):
with open(labeldir, "r") as f:
labels = f.read().splitlines()
return labels
# names=read_labellist(images_label)
#读取图片并将其转化为可输入网络的张量
class DataGenSequence(Sequence):
def __init__(self,usage):
self.usage=usage
if usage=='train':
id_file='data/train/train_id.txt'
self.images_folder=train_images_folder
self.labels = read_labellist(train_labels)
else:
id_file='data/valid/val_id.txt'
self.images_folder=valid_images_folder
self.labels = read_labellist(valid_labels)
with open(id_file,'r') as f:
self.names=f.read().splitlines()
def __len__(self):
#计算每一次epoch中数据要分成几批处理
return int(np.ceil(len(self.names) / float(batch_size)))
def __getitem__(self, idx):
# datagen = ImageDataGenerator(rotation_range=150)
i=idx*batch_size
# i=0
length=min(batch_size,(len(self.names)-i))
# length=4
batch_x=np.empty((batch_size,H,W,3),dtype=np.float32)
batch_y =np.empty((batch_size,2))
####用了resnet18网络 最后的输出层是Dense(全连接层)
# datagen.fit(batch_x)
for i_batch in range(length):
name=self.names[i]
filename=os.path.join(self.images_folder,name)
img=scipy.misc.imread(filename)
####使用数据增强
img = seq.augment_image(img)
# 数据增强的应用seq.augment_image(image)单张图片引用数据增强(H,W)
# seq.augment_images(images)多张图片应用数据增强(N,H,W)
# print(img.shape)
img = scipy.misc.imresize(img, size=(224, 224))
# img=seq.augment_images(img)
x = img / 255.
y = []
# y=np.asarray(self.labels).astype('float32')
batch_x[i_batch,:,:,0:3]=x
y.append(self.labels[i:i+1])
batch_y[i_batch,:]=to_categorical(y,2)
# batch_y=tf.reshape(batch_y,shape=(batch_size,-1))
# print(to_categorical(y,2).shape)
i+=1
# datagen.fit(batch_x)
# for (x,y)in datagen.flow(batch_x,batch_y, batch_size=12):
# # print(x.shape)
# # print(y.shape)
# batch_x = x
# batch_y= y
return batch_x,batch_y
def train_gen():
return DataGenSequence('train')
def valid_gen():
return DataGenSequence('valid')
给到你帮助的话给赞呦