tensorflow2.0
一、Model子类化创建自定义模型
# 先自定义一个残差模块,为自定义Layer
class ResBlock(layers.Layer):
def __init__(self, kernel_size, **kwargs):
super(ResBlock, self).__init__(**kwargs)
self.kernel_size = kernel_size
def build(self,input_shape):
self.conv1 = layers.Conv1D(filters=64,kernel_size=self.kernel_size,
activation = "relu",padding="same")
self.conv2 = layers.Conv1D(filters=32,kernel_size=self.kernel_size,
activation = "relu",padding="same")
self.conv3 = layers.Conv1D(filters=input_shape[-1],
kernel_size=self.kernel_size,activation = "relu",padding="same")
self.maxpool = layers.MaxPool1D(2)
super(ResBlock,self).build(input_shape) # 相当于设置self.built = True
def call(self, inputs):
x = self.conv1(inputs)
x = self.conv2(x)
x = self.conv3(x)
x = layers.Add()([inputs,x])
x = self.maxpool(x)
return x
#如果要让自定义的Layer通过Functional API 组合成模型时可以序列化,需要自定义get_config方法。
def get_config(self):
config = super(ResBlock, self).get_config()
config.update({'kernel_size': self.kernel_size})
return config
# 测试ResBlock
resblock = ResBlock(kernel_size = 3)
resblock.build(input_shape = (None,200,7))
resblock.compute_output_shape(input_shape=(None,200,7))
# 自定义模型,实际上也可以使用Sequential或者FunctionalAPI
class ImdbModel(models.Model):
def __init__(self):
super(ImdbModel, self).__init__()
def build(self,input_shape):
self.embedding = layers.Embedding(MAX_WORDS,7)
self.block1 = ResBlock(7)
self.block2 = ResBlock(5)
self.dense = layers.Dense(1,activation = "sigmoid")
super(ImdbModel,self).build(input_shape)
def call(self, x):
x = self.embedding(x)
x = self.block1(x)
x = self.block2(x)
x = layers.Flatten()(x)
x = self.dense(x)
return(x)
tf.keras.backend.clear_session()
model = ImdbModel()
model.build(input_shape =(None,200))
model.summary()
model.compile(optimizer='Nadam',
loss='binary_crossentropy',
metrics=['accuracy',"AUC"])
import datetime
logdir = "./tflogs/keras_model/" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
tensorboard_callback = tf.keras.callbacks.TensorBoard(logdir, histogram_freq=1)
history = model.fit(ds_train,validation_data = ds_test,
epochs = 6,callbacks=[tensorboard_callback])
plot_metric(history,"auc")
二、数据读取
大批数据,batch读取内存
keras.utils.Sequence接口:
继承sequence接口,创建数据generator
官方例子:
from skimage.io import imread
from skimage.transform import resize
import numpy as np
import math
# Here, `x_set` is list of path to the images
# and `y_set` are the associated classes.
class CIFAR10Sequence(Sequence):
def __init__(self, x_set, y_set, batch_size):
self.x, self.y = x_set, y_set
self.batch_size = batch_size
def __len__(self):
return math.ceil(len(self.x) / self.batch_size)
def __getitem__(self, idx):
batch_x = self.x[idx * self.batch_size:(idx + 1) *
self.batch_size]
batch_y = self.y[idx * self.batch_size:(idx + 1) *
self.batch_size]
return np.array([
resize(imread(file_name), (200, 200))
for file_name in batch_x]), np.array(batch_y)
init():初始化类。
len():返回batch_size的个数,也就是完整跑一遍数据要运行运行模型多少次。
getitem():返回一个batch_size的数据(data,label)
on_epoch_end():这个函数例子中没有用到,但是官网有给,就是在每个 epoch跑完之后,你要做什么可以通过这个函数实现
这是以上函数的作用,虽然官方给的例子是像上面那样的。但是我们却不一定要写和它一模一样的格式,只要每个函数返回的东西和上面例子一样就行(比如:getitem()返回的是一个batch_size的数据,只要你在这个函数返回的是一个batch_size的数据,那么函数里面怎么运行的都可以)。
(上面这段话摘的,地址:https://blog.csdn.net/qq_40861013/article/details/105208966)
但是这边最后的return np.array,太耗时(动态分配内存,图像过大,耗时严重),改为:
X = np.empty((self.batch_size,*self.dim))
y = np.empty((self.batch_size),dtype=int)
预先分配内存大小,之后速度刚刚的。
三、模型设置
1
model.build(input_shape=(None,128,128,3))
model.summary()
2
#train config
model.compile(optimizer=tf.keras.optimizers.Adam(0.001),
loss=tf.keras.losses.mean_squared_error,
metrics=['accuracy'])
3
#callbacks setting
#tensorboard,显示loss变化曲线
#tensorboard:
#tensorboard --logdir=/full_path_to_your_logs
model_name = "modelcp-{}".format(int(time.time()))
tensorboard = TensorBoard(log_dir='./tmp/logs/{}'.format(model_name))
#ModelCheckpoint保存最佳模型
filepath="./temp_save_model/weights-xsage-{epoch:02d}-{val_loss:.2f}.h5"#save best model
#filepath="./temp_save_model/weights.best.hdf5"#save only one
checkpoint=ModelCheckpoint(filepath, monitor='val_loss',\
verbose=0, save_best_only=True, \
save_weights_only=False, mode='auto',save_freq='epoch')
#ReduceLROnPlateau or LearningRateScheduler学习率降低接口
reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.1,
patience=5, min_lr=0.000001)
#训练模型
model.fit_generator(training_generator,steps_per_epoch=(len(img_train) /batch_size),epochs=1000,\
validation_data=test_generator,validation_steps=(len(img_test) /batch_size),\
callbacks=[checkpoint,tensorboard,reduce_lr])