tensorflow2.0建立MobilenetV3网络并进行训练与预测
Step1:前言
之所以采用tensorflow2.0来建立神经网络结构,两个原因:1.需要的依赖项比较少,只需要import tensorflow就行,这样的话搭环境就很方便;2.tensorflow对硬件的调用比较高效,很适合将这种轻量级神经网络搭建在边缘盒子上。
Step2:建立bneck卷积块
写好要用的激活函数
之所以先写激活函数,是因为之后要用到。MobilenetV3要用到的两个激活函数如下。
def h_sigmoid(x):
return tf.nn.relu6(x + 3) / 6
def h_swish(x):
return x * h_sigmoid(x)
编写轻量级注意力模型
轻量级注意力模型的示意图(被红色画笔框住的那一块):
编程实现如下所示。
class SEBlock(tf.keras.layers.Layer):
#Squeeze-and-Excitation(轻量级注意力模型)
def __init__(self, input_channels, r=16):
super(SEBlock, self).__init__()
self.pool = tf.keras.layers.GlobalAveragePooling2D()#这个是全连接层,会将(m,n,3)的张量变成(1,1,3)
self.fc1 = tf.keras.layers.Dense(units=input_channels // r)#这个是全连接层,units代表输出维度的大小
self.fc2 = tf.keras.layers.Dense(units=input_channels)
def call(self, inputs, **kwargs):
branch = self.pool(inputs) #先全局平均池化(GAP)
branch = self.fc1(branch) #再全连接层
branch = tf.nn.relu(branch) #再激活函数
branch = self.fc2(branch) #再全连接层
branch = h_sigmoid(branch) #再激活函数
branch = tf.expand_dims(input=branch, axis=1) #扩展张量的维数,如果原来的维数是(2),则现在的维数是(2,1)
branch = tf.expand_dims(input=branch, axis=1)#然后现在的维数是(2,1,1)
output = inputs * branch
return output
bneck的建立
注意,这个程序里面的类名虽然是BottleNeck,但是实际上已经是bneck了。
class BottleNeck(tf.keras.layers.Layer):
def __init__(self, in_size, exp_size, out_size, s, is_se_existing, NL, k):
super(BottleNeck, self).__init__()
self.stride = s #步长
self.in_size = in_size
self.out_size = out_size
self.is_se_existing = is_se_existing
self.NL = NL
self.conv1 = tf.keras.layers.Conv2D(filters=exp_size,
kernel_size=(1, 1),
strides=1,
padding="same") #filters是卷积核的数量,对应输出的通道数。kernel是卷积核的大小,stride是指步长,padding指填充方式,“same”代表按0填充,注意这里没有了扩展因子
self.bn1 = tf.keras.layers.BatchNormalization() #标准化
self.dwconv = tf.keras.layers.DepthwiseConv2D(kernel_size=(k, k),
strides=s,
padding="same") #深度卷积
self.bn2 = tf.keras.layers.BatchNormalization() #标准化
self.se = SEBlock(input_channels=exp_size)
self.conv2 = tf.keras.layers.Conv2D(filters=out_size,
kernel_size=(1, 1),
strides=1,
padding="same")
self.bn3 = tf.keras.layers.BatchNormalization()
self.linear = tf.keras.layers.Activation(tf.keras.activations.linear)
def call(self, inputs, training=None, **kwargs):
x = self.conv1(inputs) #先进行一个正常的卷积运算,卷积核为(1,1)
x = self.bn1(x, training=training) #标准化
if self.NL == "HS": #使用激活函数
x = h_swish(x)
elif self.NL == "RE":
x = tf.nn.relu6(x)
x = self.dwconv(x) #进行深度卷积
x = self.bn2(x, training=training) #标准化
if self.NL == "HS": #使用激活函数
x = h_swish(x)
elif self.NL == "RE":
x = tf.nn.relu6(x)
if self.is_se_existing:
x = self.se(x) #使用轻量级注意力模型
x = self.conv2(x) #再进行一个(1,1)的卷积,与前面的深度卷积构成深度可分离卷积结构
x = self.bn3(x, training=training) #标准化
x = self.linear(x) #使用线性激活函数
if self.stride == 1 and self.in_size == self.out_size: #满足这个条件的时候,与原图像进行堆叠
x = tf.keras.layers.add([x, inputs])
return x
Step3:构建MobilenetV3_small网络
网络结构示意图如下。
对应的程序如下。
class MobileNetV3Small(tf.keras.Model):
def __init__(self):
super(MobileNetV3Small, self).__init__()
#------------------------------------------------#
#下面的卷积类型与参数都是和论文中的那个表格对应的
#------------------------------------------------#
self.conv1 = tf.keras.layers.Conv2D(filters=16,kernel_size=(3, 3),strides=2,padding="same")
self.bn1 = tf.keras.layers.BatchNormalization()
self.bneck1 = BottleNeck(in_size=16, exp_size=16, out_size=16, s=2, is_se_existing=True, NL="RE", k=3)
self.bneck2 = BottleNeck(in_size=16, exp_size=72, out_size=24, s=2, is_se_existing=False, NL="RE", k=3)
self.bneck3 = BottleNeck(in_size=24, exp_size=88, out_size=24, s=1, is_se_existing=False, NL="RE", k=3)
self.bneck4 = BottleNeck(in_size=24, exp_size=96, out_size=40, s=2, is_se_existing=True, NL="HS", k=5)
self.bneck5 = BottleNeck(in_size=40, exp_size=240, out_size=40, s=1, is_se_existing=True, NL="HS", k=5)
self.bneck6 = BottleNeck(in_size=40, exp_size=240, out_size=40, s=1, is_se_existing=True, NL="HS", k=5)
self.bneck7 = BottleNeck(in_size=40, exp_size=120, out_size=48, s=1, is_se_existing=True, NL="HS", k=5)
self.bneck8 = BottleNeck(in_size=48, exp_size=144, out_size=48, s=1, is_se_existing=True, NL="HS", k=5)
self.bneck9 = BottleNeck(in_size=48, exp_size=288, out_size=96, s=2, is_se_existing=True, NL="HS", k=5)
self.bneck10 = BottleNeck(in_size=96, exp_size=576, out_size=96, s=1, is_se_existing=True, NL="HS", k=5)
self.bneck11 = BottleNeck(in_size=96, exp_size=576, out_size=96, s=1, is_se_existing=True, NL="HS", k=5)
self.conv2 = tf.keras.layers.Conv2D(filters=576,kernel_size=(1, 1),strides=1,padding="same")
self.bn2 = tf.keras.layers.BatchNormalization()
self.avgpool = tf.keras.layers.AveragePooling2D(pool_size=(7, 7),strides=1)
self.conv3 = tf.keras.layers.Conv2D(filters=1280,kernel_size=(1, 1),strides=1,padding="same")
self.conv4 = tf.keras.layers.Conv2D(filters=NUM_CLASSES,kernel_size=(1, 1),strides=1,padding="same",activation=tf.keras.activations.softmax)
def call(self, inputs, training=None, mask=None):
x = self.conv1(inputs)
x = self.bn1(x, training=training)
x = h_swish(x)
x = self.bneck1(x, training=training)
x = self.bneck2(x, training=training)
x = self.bneck3(x, training=training)
x = self.bneck4(x, training=training)
x = self.bneck5(x, training=training)
x = self.bneck6(x, training=training)
x = self.bneck7(x, training=training)
x = self.bneck8(x, training=training)
x = self.bneck9(x, training=training)
x = self.bneck10(x, training=training)
x = self.bneck11(x, training=training)
x = self.conv2(x)
x = self.bn2(x, training=training)
x = h_swish(x)
x = self.avgpool(x)
x = self.conv3(x)
x = h_swish(x)
x = self.conv4(x)
return x
注意NUM_CLASSES这个参数我没有设定,你的根据自己具体问题的需求设定。
Step4:如何调用建立的MobilenetV3进行训练
训练数据的形式与预处理。
首先,训练数据的结构如下面的这个文件夹的结构所示。
这是一个对花进行分类的数据集,第三层文件夹分别对应着花的类名,然后里面存放要用来训练的图片,对图片的文件名没有要求。将数据按照上图所示放好之后,就可以用如下程序进行预处理以生成tfrecord文件。
import tensorflow as tf
from configuration import train_dir, valid_dir, test_dir, train_tfrecord, valid_tfrecord, test_tfrecord #这是从配置文件里面导入参数,你也可以自己设置参数值。
import random
def get_images_and_labels(data_root_dir):
# get all images' paths (format: string)
data_root = pathlib.Path(data_root_dir)
all_image_path = [str(path) for path in list(data_root.glob('*/*'))]
# get labels' names
label_names = sorted(item.name for item in data_root.glob('*/'))
# dict: {label : index}
label_to_index = dict((label, index) for index, label in enumerate(label_names))
# get all images' labels
all_image_label = [label_to_index[pathlib.Path(single_image_path).parent.name] for single_image_path in all_image_path]
return all_image_path, all_image_label
# convert a value to a type compatible tf.train.Feature
def _bytes_feature(value):
# Returns a bytes_list from a string / byte.
if isinstance(value, type(tf.constant(0.))):
value = value.numpy() # BytesList won't unpack a string from an EagerTensor.
return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value]))
def _float_feature(value):
# Returns a float_list from a float / double.
return tf.train.Feature(float_list=tf.train.FloatList(value=[value]))
def _int64_feature(value):
# Returns an int64_list from a bool / enum / int / uint.
return tf.train.Feature(int64_list=tf.train.Int64List(value=[value]))
# Create a dictionary with features that may be relevant.
def image_example(image_string, label):
feature = {
'label': _int64_feature(label),
'image_raw': _bytes_feature(image_string),
}
return tf.train.Example(features=tf.train.Features(feature=feature))
def shuffle_dict(original_dict):
keys = []
shuffled_dict = {}
for k in original_dict.keys():
keys.append(k)
random.shuffle(keys)
for item in keys:
shuffled_dict[item] = original_dict[item]
return shuffled_dict
def dataset_to_tfrecord(dataset_dir, tfrecord_name):
image_paths, image_labels = get_images_and_labels(dataset_dir)
image_paths_and_labels_dict = {}
for i in range(len(image_paths)):
image_paths_and_labels_dict[image_paths[i]] = image_labels[i]
# shuffle the dict
image_paths_and_labels_dict = shuffle_dict(image_paths_and_labels_dict)
# write the images and labels to tfrecord format file
with tf.io.TFRecordWriter(path=tfrecord_name) as writer:
for image_path, label in image_paths_and_labels_dict.items():
print("Writing to tfrecord: {}".format(image_path))
image_string = open(image_path, 'rb').read()
tf_example = image_example(image_string, label)
writer.write(tf_example.SerializeToString())
if __name__ == '__main__':
dataset_to_tfrecord(dataset_dir=train_dir, tfrecord_name=train_tfrecord)
dataset_to_tfrecord(dataset_dir=valid_dir, tfrecord_name=valid_tfrecord)
dataset_to_tfrecord(dataset_dir=test_dir, tfrecord_name=test_tfrecord)
处理之后就变成下面这个样子啦。会在dataset文件夹之下生成三个tfrecord文件,tfrecord文件里面存储的应该是图片的路径+对应图片的标签。然后在训练的时候,只需要用到这三个文件就行了。
训练数据的调用
def get_the_length_of_dataset(dataset):
#用来计算数据集里面数据的个数的函数
count = 0
for i in dataset:
count += 1
return count
def get_parsed_dataset(tfrecord_name):
raw_dataset = tf.data.TFRecordDataset(tfrecord_name)
parsed_dataset = raw_dataset.map(_parse_image_function)
return parsed_dataset
def generate_datasets():
train_dataset = get_parsed_dataset(tfrecord_name=train_tfrecord) #读tfrecord文件
valid_dataset = get_parsed_dataset(tfrecord_name=valid_tfrecord) #读tfrecord文件
test_dataset = get_parsed_dataset(tfrecord_name=test_tfrecord) #读tfrecord文件
train_count = get_the_length_of_dataset(train_dataset) #获取训练集的数量
valid_count = get_the_length_of_dataset(valid_dataset) #获取val集的数量
test_count = get_the_length_of_dataset(test_dataset) #获取test集的数量
# read the dataset in the form of batch
train_dataset = train_dataset.batch(batch_size=BATCH_SIZE) #设置批处理规模
valid_dataset = valid_dataset.batch(batch_size=BATCH_SIZE) #设置批处理规模
test_dataset = test_dataset.batch(batch_size=BATCH_SIZE) #设置批处理规模
return train_dataset, valid_dataset, test_dataset, train_count, valid_count, test_count
# 调取数据
train_dataset, valid_dataset, test_dataset, train_count, valid_count, test_count = generate_datasets() #
训练方式1:使用原始数据训练
程序图下,刚看这个程序的时候,建议从main函数开始看,前面的函数的作用可以先不用管,等用到了再看,这样便于理解。
from __future__ import absolute_import, division, print_function
import tensorflow as tf
from configuration import IMAGE_HEIGHT, IMAGE_WIDTH, CHANNELS, \
EPOCHS, BATCH_SIZE, save_model_dir, model_index, save_every_n_epoch #这个是从配置文件导入超参数配置,你可以自己写就行。
import math
from models import mobilenet_v3_small
def print_model_summary(network):
#这个函数是用来打印出神经网络结构的,对训练没有任何帮助
network.build(input_shape=(None, IMAGE_HEIGHT, IMAGE_WIDTH, CHANNELS))
network.summary()
def get_parsed_dataset(tfrecord_name):
raw_dataset = tf.data.TFRecordDataset(tfrecord_name)
parsed_dataset = raw_dataset.map(_parse_image_function)
return parsed_dataset
def process_features(features, data_augmentation):
#这个函数是用来分离图片路径和类别标签
image_raw = features['image_raw'].numpy()
image_tensor_list = []
for image in image_raw:
image_tensor = load_and_preprocess_image(image, data_augmentation=data_augmentation)
image_tensor_list.append(image_tensor)
images = tf.stack(image_tensor_list, axis=0)
labels = features['label'].numpy()
return images, labels
def load_and_preprocess_image(image_raw, data_augmentation=False):
# decode
image_tensor = tf.io.decode_image(contents=image_raw, channels=CHANNELS, dtype=tf.dtypes.float32)
if data_augmentation:
image = tf.image.random_flip_left_right(image=image_tensor)
image = tf.image.resize_with_crop_or_pad(image=image,
target_height=int(IMAGE_HEIGHT * 1.2),
target_width=int(IMAGE_WIDTH * 1.2))
image = tf.image.random_crop(value=image, size=[IMAGE_HEIGHT, IMAGE_WIDTH, CHANNELS])
image = tf.image.random_brightness(image=image, max_delta=0.5)
else:
image = tf.image.resize(image_tensor, [IMAGE_HEIGHT, IMAGE_WIDTH])
return image
def generate_datasets():
train_dataset = get_parsed_dataset(tfrecord_name=train_tfrecord) #读tfrecord文件
valid_dataset = get_parsed_dataset(tfrecord_name=valid_tfrecord) #读tfrecord文件
test_dataset = get_parsed_dataset(tfrecord_name=test_tfrecord) #读tfrecord文件
train_count = get_the_length_of_dataset(train_dataset) #获取训练集的数量
valid_count = get_the_length_of_dataset(valid_dataset) #获取val集的数量
test_count = get_the_length_of_dataset(test_dataset) #获取test集的数量
# read the dataset in the form of batch
train_dataset = train_dataset.batch(batch_size=BATCH_SIZE) #设置批处理规模
valid_dataset = valid_dataset.batch(batch_size=BATCH_SIZE) #设置批处理规模
test_dataset = test_dataset.batch(batch_size=BATCH_SIZE) #设置批处理规模
return train_dataset, valid_dataset, test_dataset, train_count, valid_count, test_count
if __name__ == '__main__':
# GPU settings:在tensorflow2.0的gpu版本里,会自动调用GPU计算,所以实际上下面这4行是多余的。
gpus = tf.config.list_physical_devices("GPU")
if gpus:
for gpu in gpus:
tf.config.experimental.set_memory_growth(gpu, True)
#调取数据
train_dataset, valid_dataset, test_dataset, train_count, valid_count, test_count = generate_datasets() #
#获取神经网络模型
model = mobilenet_v3_small.MobileNetV3Small()
print_model_summary(network=model) #打印出神经网络结构
# define loss and optimizer:定义损失函数预优化器
loss_object = tf.keras.losses.SparseCategoricalCrossentropy() #用的是交叉熵损失函数
optimizer = tf.keras.optimizers.RMSprop() #这是一个梯度下降加速器。
train_loss = tf.keras.metrics.Mean(name='train_loss')
train_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(name='train_accuracy')
valid_loss = tf.keras.metrics.Mean(name='valid_loss')
valid_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(name='valid_accuracy')
# @tf.function
def train_step(image_batch, label_batch):
#用于训练的函数
with tf.GradientTape() as tape: #tf.GradientType()是一个自动求导的api,把求导的操作都记录在tape中,然后供后面使用
predictions = model(image_batch, training=True)
loss = loss_object(y_true=label_batch, y_pred=predictions)
gradients = tape.gradient(loss, model.trainable_variables) #trainable_variables代表可训练变量
optimizer.apply_gradients(grads_and_vars=zip(gradients, model.trainable_variables))
train_loss.update_state(values=loss)
train_accuracy.update_state(y_true=label_batch, y_pred=predictions)
# @tf.function
def valid_step(image_batch, label_batch):
#训练集里面分离出的测试集
predictions = model(image_batch, training=False)
v_loss = loss_object(label_batch, predictions)
valid_loss.update_state(values=v_loss)
valid_accuracy.update_state(y_true=label_batch, y_pred=predictions)
# start training
for epoch in range(EPOCHS):
step = 0
for features in train_dataset:
step += 1
images, labels = process_features(features, data_augmentation=True)
train_step(images, labels) #开始训练
print("Epoch: {}/{}, step: {}/{}, loss: {:.5f}, accuracy: {:.5f}".format(epoch,
EPOCHS,
step,
math.ceil(train_count / BATCH_SIZE),
train_loss.result().numpy(),
train_accuracy.result().numpy()))
for features in valid_dataset:
valid_images, valid_labels = process_features(features, data_augmentation=False)
valid_step(valid_images, valid_labels)
print("Epoch: {}/{}, train loss: {:.5f}, train accuracy: {:.5f}, "
"valid loss: {:.5f}, valid accuracy: {:.5f}".format(epoch,
EPOCHS,
train_loss.result().numpy(),
train_accuracy.result().numpy(),
valid_loss.result().numpy(),
valid_accuracy.result().numpy()))
train_loss.reset_states() #每进行一次迭代之后,都需要重置这些数据
train_accuracy.reset_states()
valid_loss.reset_states()
valid_accuracy.reset_states()
if epoch % save_every_n_epoch == 0:
model.save_weights(filepath=save_model_dir+"epoch-{}".format(epoch), save_format='tf')
# save weights
model.save_weights(filepath=save_model_dir+"model", save_format='tf') #保存训练的权值
训练方式2:使用别人的预训练权值进行训练(迁移学习)
训练别人的权值,只需要先下载好别人的ckpt格式的预训练权值文件,然后在训练方式1中的model = mobilenet_v3_small.MobileNetV3Small()这一句后面加上下面这一句即可。
model.load_weights('权值文件的路径')
Step5:如何利用训练好的MobilenetV3模型进行预测
相比于训练,预测起来就很简单啦,代码如下。
import tensorflow as tf
from configuration import test_image_dir #这个是从配置文件导入超参数配置,你可以自己写就行。
from train import get_model
import time
def load_and_preprocess_image(image_raw, data_augmentation=False):#进行图像增强和剪裁的函数。
# decode
image_tensor = tf.io.decode_image(contents=image_raw, channels=CHANNELS, dtype=tf.dtypes.float32)
if data_augmentation:
image = tf.image.random_flip_left_right(image=image_tensor)
image = tf.image.resize_with_crop_or_pad(image=image,
target_height=int(IMAGE_HEIGHT * 1.2),
target_width=int(IMAGE_WIDTH * 1.2))
image = tf.image.random_crop(value=image, size=[IMAGE_HEIGHT, IMAGE_WIDTH, CHANNELS])
image = tf.image.random_brightness(image=image, max_delta=0.5)
else:
image = tf.image.resize(image_tensor, [IMAGE_HEIGHT, IMAGE_WIDTH])
return image
def get_single_picture_prediction(model, picture_dir):#进行预测的函数
image_tensor = load_and_preprocess_image(tf.io.read_file(filename=picture_dir), data_augmentation=False)#这里进行图片增强和变成输入尺寸
#tf.io.read_file用于图像数据的读取,相当于python里面的open,读取完文件后,一般要配合tf.image.decode_image()对图片解码。
image = tf.expand_dims(image_tensor, axis=0)
start = time.time()
prediction = model(image, training=False) #开始预测
end = time.time()
print("用时:",end - start)
pred_class = tf.math.argmax(prediction, axis=-1) #非极大值抑制
return pred_class
if __name__ == '__main__':
# GPU settings:在tensorflow2.0的gpu版本里,会自动调用GPU计算,所以实际上下面这4行是多余的。
gpus = tf.config.list_physical_devices('GPU')
if gpus:
for gpu in gpus:
tf.config.experimental.set_memory_growth(gpu, True)
# load the model
model = get_model()
model.load_weights(filepath="saved_model\epoch-30.ckpt") #加载权值,根据你自己的权值文件位置填路径
while True:
pred_class = get_single_picture_prediction(model, test_image_dir)
print(pred_class.numpy())