model
"""
official code:
https://github.com/google/automl/tree/master/efficientnetv2
"""
import itertools
import tensorflow as tf
from tensorflow.keras import layers, Model, Input
CONV_KERNEL_INITIALIZER = {
'class_name': 'VarianceScaling',
'config': {
'scale': 2.0,
'mode': 'fan_out',
'distribution': 'truncated_normal'
}
}
DENSE_KERNEL_INITIALIZER = {
'class_name': 'VarianceScaling',
'config': {
'scale': 1. / 3.,
'mode': 'fan_out',
'distribution': 'uniform'
}
}
class SE(layers.Layer):
def __init__(self,
se_filters: int,
output_filters: int,
name: str = None):
super(SE, self).__init__(name=name)
self.se_reduce = layers.Conv2D(filters=se_filters,
kernel_size=1,
strides=1,
padding="same",
activation="swish",
use_bias=True,
kernel_initializer=CONV_KERNEL_INITIALIZER,
name="conv2d")
self.se_expand = layers.Conv2D(filters=output_filters,
kernel_size=1,
strides=1,
padding="same",
activation="sigmoid",
use_bias=True,
kernel_initializer=CONV_KERNEL_INITIALIZER,
name="conv2d_1")
def call(self, inputs, **kwargs):
# Tensor: [N, H, W, C] -> [N, 1, 1, C]
se_tensor = tf.reduce_mean(inputs, [1, 2], keepdims=True)
se_tensor = self.se_reduce(se_tensor)
se_tensor = self.se_expand(se_tensor)
return se_tensor * inputs
class MBConv(layers.Layer):
def __init__(self,
kernel_size: int,
input_c: int,
out_c: int,
expand_ratio: int,
stride: int,
se_ratio: float = 0.25,
drop_rate: float = 0.,
name: str = None):
super(MBConv, self).__init__(name=name)
if stride not in [1, 2]:
raise ValueError("illegal stride value.")
self.has_shortcut = (stride == 1 and input_c == out_c)
expanded_c = input_c * expand_ratio
bid = itertools.count(0)
get_norm_name = lambda: 'batch_normalization' + ('' if not next(
bid) else '_' + str(next(bid) // 2))
cid = itertools.count(0)
get_conv_name = lambda: 'conv2d' + ('' if not next(cid) else '_' + str(
next(cid) // 2))
# 在EfficientNetV2中,MBConv中不存在expansion=1的情况所以conv_pw肯定存在
assert expand_ratio != 1
# Point-wise expansion
self.expand_conv = layers.Conv2D(
filters=expanded_c,
kernel_size=1,
strides=1,
padding="same",
use_bias=False,
name=get_conv_name())
self.norm0 = layers.BatchNormalization(
axis=-1,
momentum=0.9,
epsilon=1e-3,
name=get_norm_name())
self.act0 = layers.Activation("swish")
# Depth-wise convolution
self.depthwise_conv = layers.DepthwiseConv2D(
kernel_size=kernel_size,
strides=stride,
depthwise_initializer=CONV_KERNEL_INITIALIZER,
padding="same",
use_bias=False,
name="depthwise_conv2d")
self.norm1 = layers.BatchNormalization(
axis=-1,
momentum=0.9,
epsilon=1e-3,
name=get_norm_name())
self.act1 = layers.Activation("swish")
# SE
num_reduced_filters = max(1, int(input_c * se_ratio))
self.se = SE(num_reduced_filters, expanded_c, name="se")
# Point-wise linear projection
self.project_conv = layers.Conv2D(
filters=out_c,
kernel_size=1,
strides=1,
kernel_initializer=CONV_KERNEL_INITIALIZER,
padding="same",
use_bias=False,
name=get_conv_name())
self.norm2 = layers.BatchNormalization(
axis=-1,
momentum=0.9,
epsilon=1e-3,
name=get_norm_name())
self.drop_rate = drop_rate
if self.has_shortcut and drop_rate > 0:
# Stochastic Depth
self.drop_path = layers.Dropout(rate=drop_rate,
noise_shape=(None, 1, 1, 1), # binary dropout mask
name="drop_path")
def call(self, inputs, training=None):
x = inputs
x = self.expand_conv(x)
x = self.norm0(x, training=training)
x = self.act0(x)
x = self.depthwise_conv(x)
x = self.norm1(x, training=training)
x = self.act1(x)
x = self.se(x)
x = self.project_conv(x)
x = self.norm2(x, training=training)
if self.has_shortcut:
if self.drop_rate > 0:
x = self.drop_path(x, training=training)
x = tf.add(x, inputs)
return x
class FusedMBConv(layers.Layer):
def __init__(self,
kernel_size: int,
input_c: int,
out_c: int,
expand_ratio: int,
stride: int,
se_ratio: float,
drop_rate: float = 0.,
name: str = None):
super(FusedMBConv, self).__init__(name=name)
if stride not in [1, 2]:
raise ValueError("illegal stride value.")
assert se_ratio == 0.
self.has_shortcut = (stride == 1 and input_c == out_c)
self.has_expansion = expand_ratio != 1
expanded_c = input_c * expand_ratio
bid = itertools.count(0)
get_norm_name = lambda: 'batch_normalization' + ('' if not next(
bid) else '_' + str(next(bid) // 2))
cid = itertools.count(0)
get_conv_name = lambda: 'conv2d' + ('' if not next(cid) else '_' + str(
next(cid) // 2))
if expand_ratio != 1:
self.expand_conv = layers.Conv2D(
filters=expanded_c,
kernel_size=kernel_size,
strides=stride,
kernel_initializer=CONV_KERNEL_INITIALIZER,
padding="same",
use_bias=False,
name=get_conv_name())
self.norm0 = layers.BatchNormalization(
axis=-1,
momentum=0.9,
epsilon=1e-3,
name=get_norm_name())
self.act0 = layers.Activation("swish")
self.project_conv = layers.Conv2D(
filters=out_c,
kernel_size=1 if expand_ratio != 1 else kernel_size,
strides=1 if expand_ratio != 1 else stride,
kernel_initializer=CONV_KERNEL_INITIALIZER,
padding="same",
use_bias=False,
name=get_conv_name())
self.norm1 = layers.BatchNormalization(
axis=-1,
momentum=0.9,
epsilon=1e-3,
name=get_norm_name())
if expand_ratio == 1:
self.act1 = layers.Activation("swish")
self.drop_rate = drop_rate
if self.has_shortcut and drop_rate > 0:
# Stochastic Depth
self.drop_path = layers.Dropout(rate=drop_rate,
noise_shape=(None, 1, 1, 1), # binary dropout mask
name="drop_path")
def call(self, inputs, training=None):
x = inputs
if self.has_expansion:
x = self.expand_conv(x)
x = self.norm0(x, training=training)
x = self.act0(x)
x = self.project_conv(x)
x = self.norm1(x, training=training)
if self.has_expansion is False:
x = self.act1(x)
if self.has_shortcut:
if self.drop_rate > 0:
x = self.drop_path(x, training=training)
x = tf.add(x, inputs)
return x
class Stem(layers.Layer):
def __init__(self, filters: int, name: str = None):
super(Stem, self).__init__(name=name)
self.conv_stem = layers.Conv2D(
filters=filters,
kernel_size=3,
strides=2,
kernel_initializer=CONV_KERNEL_INITIALIZER,
padding="same",
use_bias=False,
name="conv2d")
self.norm = layers.BatchNormalization(
axis=-1,
momentum=0.9,
epsilon=1e-3,
name="batch_normalization")
self.act = layers.Activation("swish")
def call(self, inputs, training=None):
x = self.conv_stem(inputs)
x = self.norm(x, training=training)
x = self.act(x)
return x
class Head(layers.Layer):
def __init__(self,
filters: int = 1280,
num_classes: int = 1000,
drop_rate: float = 0.,
name: str = None):
super(Head, self).__init__(name=name)
self.conv_head = layers.Conv2D(
filters=filters,
kernel_size=1,
kernel_initializer=CONV_KERNEL_INITIALIZER,
padding="same",
use_bias=False,
name="conv2d")
self.norm = layers.BatchNormalization(
axis=-1,
momentum=0.9,
epsilon=1e-3,
name="batch_normalization")
self.act = layers.Activation("swish")
self.avg = layers.GlobalAveragePooling2D()
self.fc = layers.Dense(num_classes,
kernel_initializer=DENSE_KERNEL_INITIALIZER)
if drop_rate > 0:
self.dropout = layers.Dropout(drop_rate)
def call(self, inputs, training=None):
x = self.conv_head(inputs)
x = self.norm(x)
x = self.act(x)
x = self.avg(x)
if self.dropout:
x = self.dropout(x, training=training)
x = self.fc(x)
return x
class EfficientNetV2(Model):
def __init__(self,
model_cnf: list,
num_classes: int = 1000,
num_features: int = 1280,
dropout_rate: float = 0.2,
drop_connect_rate: float = 0.2,
name: str = None):
super(EfficientNetV2, self).__init__(name=name)
for cnf in model_cnf:
assert len(cnf) == 8
stem_filter_num = model_cnf[0][4]
self.stem = Stem(stem_filter_num)
total_blocks = sum([i[0] for i in model_cnf])
block_id = 0
self.blocks = []
# Builds blocks.
for cnf in model_cnf:
repeats = cnf[0]
op = FusedMBConv if cnf[-2] == 0 else MBConv
for i in range(repeats):
self.blocks.append(op(kernel_size=cnf[1],
input_c=cnf[4] if i == 0 else cnf[5],
out_c=cnf[5],
expand_ratio=cnf[3],
stride=cnf[2] if i == 0 else 1,
se_ratio=cnf[-1],
drop_rate=drop_connect_rate * block_id / total_blocks,
name="blocks_{}".format(block_id)))
block_id += 1
self.head = Head(num_features, num_classes, dropout_rate)
# def summary(self, input_shape=(224, 224, 3), **kwargs):
# x = Input(shape=input_shape)
# model = Model(inputs=[x], outputs=self.call(x, training=True))
# return model.summary()
def call(self, inputs, training=None):
x = self.stem(inputs, training)
# call for blocks.
for _, block in enumerate(self.blocks):
x = block(x, training=training)
x = self.head(x, training=training)
return x
def efficientnetv2_s(num_classes: int = 1000):
"""
EfficientNetV2
https://arxiv.org/abs/2104.00298
"""
# train_size: 300, eval_size: 384
# repeat, kernel, stride, expansion, in_c, out_c, operator, se_ratio
model_config = [[2, 3, 1, 1, 24, 24, 0, 0],
[4, 3, 2, 4, 24, 48, 0, 0],
[4, 3, 2, 4, 48, 64, 0, 0],
[6, 3, 2, 4, 64, 128, 1, 0.25],
[9, 3, 1, 6, 128, 160, 1, 0.25],
[15, 3, 2, 6, 160, 256, 1, 0.25]]
model = EfficientNetV2(model_cnf=model_config,
num_classes=num_classes,
dropout_rate=0.2,
name="efficientnetv2-s")
return model
def efficientnetv2_m(num_classes: int = 1000):
"""
EfficientNetV2
https://arxiv.org/abs/2104.00298
"""
# train_size: 384, eval_size: 480
# repeat, kernel, stride, expansion, in_c, out_c, operator, se_ratio
model_config = [[3, 3, 1, 1, 24, 24, 0, 0],
[5, 3, 2, 4, 24, 48, 0, 0],
[5, 3, 2, 4, 48, 80, 0, 0],
[7, 3, 2, 4, 80, 160, 1, 0.25],
[14, 3, 1, 6, 160, 176, 1, 0.25],
[18, 3, 2, 6, 176, 304, 1, 0.25],
[5, 3, 1, 6, 304, 512, 1, 0.25]]
model = EfficientNetV2(model_cnf=model_config,
num_classes=num_classes,
dropout_rate=0.3,
name="efficientnetv2-m")
return model
def efficientnetv2_l(num_classes: int = 1000):
"""
EfficientNetV2
https://arxiv.org/abs/2104.00298
"""
# train_size: 384, eval_size: 480
# repeat, kernel, stride, expansion, in_c, out_c, operator, se_ratio
model_config = [[4, 3, 1, 1, 32, 32, 0, 0],
[7, 3, 2, 4, 32, 64, 0, 0],
[7, 3, 2, 4, 64, 96, 0, 0],
[10, 3, 2, 4, 96, 192, 1, 0.25],
[19, 3, 1, 6, 192, 224, 1, 0.25],
[25, 3, 2, 6, 224, 384, 1, 0.25],
[7, 3, 1, 6, 384, 640, 1, 0.25]]
model = EfficientNetV2(model_cnf=model_config,
num_classes=num_classes,
dropout_rate=0.4,
name="efficientnetv2-l")
return model
# m = efficientnetv2_s()
# m.summary()
main
import os
import sys
import math
import datetime
import numpy as np
from random import shuffle
import cv2 as cv
import tensorflow as tf
from tqdm import tqdm
from model import efficientnetv2_s as create_model
from utils import generate_ds
assert tf.version.VERSION >= "2.4.0", "version of tf must greater/equal than 2.4.0"
def main():
# data_root = "/data/flower_photos" # get data root path
#
# if not os.path.exists("./save_weights"):
# os.makedirs("./save_weights")
img_size = {"s": [300, 384], # train_size, val_size
"m": [384, 480],
"l": [480, 480]}
num_model = "l"
batch_size = 32
epochs = 1000
num_classes = 10
freeze_layers = True
initial_lr = 0.01
log_dir = "./logs/" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
train_writer = tf.summary.create_file_writer(os.path.join(log_dir, "train"))
val_writer = tf.summary.create_file_writer(os.path.join(log_dir, "val"))
# data generator with data augmentation
# train_ds, val_ds = generate_ds(data_root,
# train_im_height=img_size[num_model][0],
# train_im_width=img_size[num_model][0],
# val_im_height=img_size[num_model][1],
# val_im_width=img_size[num_model][1],
# batch_size=batch_size)
print('-------------------------------------------------------------------------数据集准备------------------------------------------------')
# create model
name_dict = {"BF": 0, "BK": 1, "BL": 2, "BR": 3, "CF": 4, "CL": 5, "CV": 6, "CXK": 7, "S": 8, "XF": 9}
data_root_path = "C:/my_all_data_download/ZCB/color_part_data_processing/"
test_file_path = "C:/my_all_data_download/ZCB/TXT_doc/test.txt" # 测试集数据集文件
trainer_file_path = "C:/my_all_data_download/ZCB/TXT_doc/trainer.txt" # 训练集数据集文件
name_data_list = {} # 记录每类图片有多少训练图片、测试图片
trainer_list = []
test_list = []
# 将图片完整路径存入字典
def save_train_test_file(path, name):
if name not in name_data_list:
img_list = []
img_list.append(path)
name_data_list[name] = img_list
else:
name_data_list[name].append(path)
# 遍历数据集目录,提取出图片路径,分训练集、测试集
dirs = os.listdir(data_root_path)
for d in dirs:
full_path = data_root_path + d
if os.path.isdir(full_path):
imgs = os.listdir(full_path) # 列出子目录中所有图片
for img in imgs:
save_train_test_file(full_path + "/" + img, d)
# 将字典中的内容写入测试集、训练集文件
with open(test_file_path, "w") as f: # 清空测试集文件
pass
with open(trainer_file_path, "w") as f: # 清空训练集文件
pass
# 遍历字典,分数据
for name, img_list in name_data_list.items():
i = 0
num = len(img_list)
print(f"{name}:{num}张")
for img in img_list:
if i % 10 == 0:
test_list.append(f"{img}\t{name_dict[name]}\n")
else:
trainer_list.append(f"{img}\t{name_dict[name]}\n")
i += 1
with open(trainer_file_path, "w") as f:
shuffle(trainer_list)
f.writelines(trainer_list)
with open(test_file_path, "w") as f:
f.writelines(test_list)
print("---------------------------------------------------之前的代码主要是生成.txt文件便于找到图片和对应的标签-------------------------------------------------")
def generateds(train_list):
x, y_ = [], [] # x图片数据,y_为标签
with open(train_list, 'r') as f:
# 读取所有行
lines = [line.strip() for line in f] # 对数据进行掐头去尾放入列表
for line in lines:
img_path, lab = line.strip().split("\t")
img = cv.imread(img_path) # 读入图片
img = cv.resize(img, (224, 224)) ####对图片进行放缩**********************************
# img = np.array(img.convert('L')) #将图片变为8位宽灰度值的np.array格式
img = img / 255 # 数据归一化(实现预处理)
x.append(img) # 归一化后的数据,贴到列表x
y_.append(lab)
x = np.array(x)
y_ = np.array(y_)
y_ = y_.astype(np.int64)
return x, y_
x_train, y_train = generateds(trainer_file_path)
x_test, y_test = generateds(test_file_path)
x_train = tf.convert_to_tensor(x_train, dtype=tf.float32)
y_train = tf.convert_to_tensor(y_train, dtype=tf.int32)
x_test = tf.convert_to_tensor(x_test, dtype=tf.float32)
y_test = tf.convert_to_tensor(y_test, dtype=tf.int32)
train_dataset = tf.data.Dataset.from_tensor_slices((x_train, y_train)) # 构建数据集对象
train_ds = train_dataset.batch(32) # 设置批量训练的batch为32,要将训练集重复训练10遍
test_dataset = tf.data.Dataset.from_tensor_slices((x_test, y_test))
val_ds = test_dataset.batch(32)
print('-------------------------------------------------------------------------数据集准备------------------------------------------------')
model = create_model(num_classes=num_classes)
model.build((32, img_size[num_model][0], img_size[num_model][0], 3))
# # 下载我提前转好的预训练权重
# # 链接: https://pan.baidu.com/s/1Pr-pO5sQVySPQnBY8pQH7w 密码: f6hi
# # load weights
# pre_weights_path = './efficientnetv2-s.h5'
# assert os.path.exists(pre_weights_path), "cannot find {}".format(pre_weights_path)
# model.load_weights(pre_weights_path, by_name=True, skip_mismatch=True)
# freeze bottom layers
if freeze_layers:
unfreeze_layers = "head"
for layer in model.layers:
if unfreeze_layers not in layer.name:
layer.trainable = False
else:
print("training {}".format(layer.name))
model.summary()
# custom learning rate curve
def scheduler(now_epoch):
end_lr_rate = 0.01 # end_lr = initial_lr * end_lr_rate
rate = ((1 + math.cos(now_epoch * math.pi / epochs)) / 2) * (1 - end_lr_rate) + end_lr_rate # cosine
new_lr = rate * initial_lr
# writing lr into tensorboard
with train_writer.as_default():
tf.summary.scalar('learning rate', data=new_lr, step=epoch)
return new_lr
# using keras low level api for training
loss_object = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)
optimizer = tf.keras.optimizers.SGD(learning_rate=initial_lr, momentum=0.9)
train_loss = tf.keras.metrics.Mean(name='train_loss')
train_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(name='train_accuracy')
val_loss = tf.keras.metrics.Mean(name='val_loss')
val_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(name='val_accuracy')
@tf.function
def train_step(train_images, train_labels):
with tf.GradientTape() as tape:
output = model(train_images, training=True)
loss = loss_object(train_labels, output)
gradients = tape.gradient(loss, model.trainable_variables)
optimizer.apply_gradients(zip(gradients, model.trainable_variables))
train_loss(loss)
train_accuracy(train_labels, output)
@tf.function
def val_step(val_images, val_labels):
output = model(val_images, training=False)
loss = loss_object(val_labels, output)
val_loss(loss)
val_accuracy(val_labels, output)
best_val_acc = 0.
for epoch in range(epochs):
train_loss.reset_states() # clear history info
train_accuracy.reset_states() # clear history info
val_loss.reset_states() # clear history info
val_accuracy.reset_states() # clear history info
# train
train_bar = tqdm(train_ds, file=sys.stdout)
for images, labels in train_bar:
train_step(images, labels)
# print train process
train_bar.desc = "train epoch[{}/{}] loss:{:.3f}, acc:{:.3f}".format(epoch + 1,
epochs,
train_loss.result(),
train_accuracy.result())
# update learning rate
optimizer.learning_rate = scheduler(epoch)
# validate
val_bar = tqdm(val_ds, file=sys.stdout)
for images, labels in val_bar:
val_step(images, labels)
# print val process
val_bar.desc = "valid epoch[{}/{}] loss:{:.3f}, acc:{:.3f}".format(epoch + 1,
epochs,
val_loss.result(),
val_accuracy.result())
# writing training loss and acc
with train_writer.as_default():
tf.summary.scalar("loss", train_loss.result(), epoch)
tf.summary.scalar("accuracy", train_accuracy.result(), epoch)
# writing validation loss and acc
with val_writer.as_default():
tf.summary.scalar("loss", val_loss.result(), epoch)
tf.summary.scalar("accuracy", val_accuracy.result(), epoch)
# only save best weights
if val_accuracy.result() > best_val_acc:
best_val_acc = val_accuracy.result()
save_name = "./save_weights/efficientnetv2.ckpt"
model.save_weights(save_name, save_format="tf")
if __name__ == '__main__':
main()
测试集准确率为62%