前言
之前自己苦于这方面久矣,很多博主或者书籍都以minist数据集为例,我真的。。。那种keras自带的数据库输入简直算是开挂好嘛,我们主要谈谈自己的数据集到底该咋办。注:如下函数代码都是博主自己可以跑通的~
背景知识理解
我们导入数据后大多是用fit_generator来跑网络,我们先看官方文档几个经典的函数解释:
然后一般网络要求输入都是四维数组(batch数,长,宽,通道数),所以我们数据也要整成四维数组的形式否则会报各种错
代码展示
第一种 keras自带函数
1.flow_from_directory:图片按照文件夹分
def trainGenerator(batch_size,train_path,original_dir,mask_dir,aug_dict,target_size,image_color_mode = "grayscale",aug_image_save_dir=None,aug_mask_save_dir=None,original_aug_prefix="image",mask_aug_prefix="mask",seed=1):
original_datagen = ImageDataGenerator(**aug_dict)
mask_datagen = ImageDataGenerator(**aug_dict)
original_generator = original_datagen.flow_from_directory(
train_path,
classes = [original_dir],
class_mode = None,
color_mode = image_color_mode,#图片转换为灰度图像
target_size = target_size,#data_size = (256, 256)相当于resize的功能
batch_size = batch_size, #BATCH_SIZE = 8
save_to_dir = aug_image_save_dir,#把变动之后的图保存在Aug_originall文件夹里面
save_prefix = original_aug_prefix,
seed = seed)
# myGene = trainGenerator(BATCH_SIZE, train_path, type, "GTM", aug_dict, target_size=data_size,
#aug_image_save_dir=Aug_originall, aug_mask_save_dir=Aug_GTM1)
mask_generator = mask_datagen.flow_from_directory(
train_path,
classes = [mask_dir],
class_mode = None,
color_mode = image_color_mode,
target_size = target_size,
batch_size = batch_size,
save_to_dir = aug_mask_save_dir,
save_prefix = mask_aug_prefix,
seed = seed)
train_generator = zip(original_generator,mask_generator)
#zip()是Python的一个内建函数,它接受一系列可迭代的对象作为参数,将对象中对应的元素打包成一个个tuple(元组),然后返回由这
# 些tuples组成的list(列表)。也就是说,该函数返回一个以元组为元素的列表,其中第 i 个元组包含每个参数序列的第 i 个元素。
#原图GT一一对应打好包
for (original,mask) in train_generator:
original,mask = adjustData(original,mask)
yield (original,mask)
#按批次返回数据
调用
aug_dict = dict(rotation_range=0.02, #
width_shift_range=0.05,
height_shift_range=0.05,
shear_range=0.05,
zoom_range=0.05,
horizontal_flip=True,
fill_mode='nearest')
myGene = trainGenerator(BATCH_SIZE, train_path, type, "GTM", aug_dict, target_size=data_size,
aug_image_save_dir=Aug_originall, aug_mask_save_dir=Aug_GTM1)
valGene = validationGenerator(BATCH_SIZE, val_path, type, "GTM", aug_dict, target_size=data_size,
aug_image_save_dir=Aug_original2, aug_mask_save_dir=Aug_GTM2)
就可以
history = model.fit_generator(myGene, steps_per_epoch =5,epochs=2, validation_data=valGene,validation_steps=5,callbacks=[model_checkpoint])
2.图片按照路径分的,csv里面是“路径,类别”
gen = keras.preprocessing.image.ImageDataGenerator()
#数据集拓充,详见https: // blog.csdn.net / jacke121 / article / details / 79245732
#val_gen = keras.preprocessing.image.ImageDataGenerator(horizontal_flip=True, vertical_flip=True)
val_gen = keras.preprocessing.image.ImageDataGenerator()
train_df=pd.read_csv('marineimage/training.csv',dtype={'SpeciesID': object})
#val_df=pd.read_csv("E:\\marineimage\\val.csv",dtype={'SpeciesID': object})
train_df['FileID'] = train_df['FileID'].apply(lambda x: str(x) + '.jpg')
#val_df['FileID'] = val_df['FileID'].apply(lambda x: str(x) + '.jpg')
#train_df['SpeciesID'] = train_df['SpeciesID'].astype('object')
#val_df['SpeciesID'] = val_df['SpeciesID'].astype('object')
batches=gen.flow_from_dataframe(dataframe=train_df, directory='marineimage/data', x_col="FileID",
y_col="SpeciesID", class_mode="categorical", shuffle=True,target_size=SIZE, batch_size=BATCH_SIZE)
第二种 自己弄
def read_image_batch(data_path,batch_size):
while True:
image_list=open(data_path).readlines()
l=len(image_list)
num_batch=l//batch_size
if num_batch*batch_size<l:
num_batch+=1
for i in range(num_batch):
batch_set=image_list[batch_size*i:min(batch_size*(i+1),l)]
batch_set=[bs.strip().split() for bs in batch_set]
X=np.array([imageio.imread(line[0][1:], as_gray=True) for line in batch_set])
labels=np.array([imageio.imread(line[1][1:]) for line in batch_set])
#x = np.empty((batch_size, 360, 480, 1))
X = X/255
X = np.reshape(X, (batch_size, 360, 480, 1))
#x[:, :, :, 0] = X
labels=labels/255
y=to_categorical(labels,num_classes)
yield tuple((X, y))
data_path 是一个路径txt,里面是文件名,类似这种8
这是训练集的制作方法