tensorflow2.0建立MobilenetV3网络并进行训练与预测

Step1:前言

之所以采用tensorflow2.0来建立神经网络结构,两个原因:1.需要的依赖项比较少,只需要import tensorflow就行,这样的话搭环境就很方便;2.tensorflow对硬件的调用比较高效,很适合将这种轻量级神经网络搭建在边缘盒子上。

Step2:建立bneck卷积块

写好要用的激活函数

之所以先写激活函数,是因为之后要用到。MobilenetV3要用到的两个激活函数如下。

def h_sigmoid(x):
    return tf.nn.relu6(x + 3) / 6
def h_swish(x):
    return x * h_sigmoid(x)

编写轻量级注意力模型

轻量级注意力模型的示意图(被红色画笔框住的那一块):
在这里插入图片描述
编程实现如下所示。

class SEBlock(tf.keras.layers.Layer):
    #Squeeze-and-Excitation(轻量级注意力模型)
    def __init__(self, input_channels, r=16):
        super(SEBlock, self).__init__()
        self.pool = tf.keras.layers.GlobalAveragePooling2D()#这个是全连接层,会将(m,n,3)的张量变成(1,1,3)
        self.fc1 = tf.keras.layers.Dense(units=input_channels // r)#这个是全连接层,units代表输出维度的大小
        self.fc2 = tf.keras.layers.Dense(units=input_channels)

    def call(self, inputs, **kwargs):
        branch = self.pool(inputs) #先全局平均池化(GAP)
        branch = self.fc1(branch) #再全连接层
        branch = tf.nn.relu(branch) #再激活函数
        branch = self.fc2(branch) #再全连接层
        branch = h_sigmoid(branch) #再激活函数
        branch = tf.expand_dims(input=branch, axis=1) #扩展张量的维数,如果原来的维数是(2),则现在的维数是(2,1)
        branch = tf.expand_dims(input=branch, axis=1)#然后现在的维数是(2,1,1)
        output = inputs * branch 
        return output

bneck的建立

注意,这个程序里面的类名虽然是BottleNeck,但是实际上已经是bneck了。

class BottleNeck(tf.keras.layers.Layer):
    def __init__(self, in_size, exp_size, out_size, s, is_se_existing, NL, k):
        super(BottleNeck, self).__init__()
        self.stride = s #步长
        self.in_size = in_size 
        self.out_size = out_size
        self.is_se_existing = is_se_existing
        self.NL = NL
        self.conv1 = tf.keras.layers.Conv2D(filters=exp_size,
                                            kernel_size=(1, 1),
                                            strides=1,
                                            padding="same") #filters是卷积核的数量,对应输出的通道数。kernel是卷积核的大小,stride是指步长,padding指填充方式,“same”代表按0填充,注意这里没有了扩展因子
        self.bn1 = tf.keras.layers.BatchNormalization() #标准化
        self.dwconv = tf.keras.layers.DepthwiseConv2D(kernel_size=(k, k),
                                                      strides=s,
                                                      padding="same") #深度卷积
        self.bn2 = tf.keras.layers.BatchNormalization() #标准化
        self.se = SEBlock(input_channels=exp_size)
        self.conv2 = tf.keras.layers.Conv2D(filters=out_size,
                                            kernel_size=(1, 1),
                                            strides=1,
                                            padding="same")
        self.bn3 = tf.keras.layers.BatchNormalization()
        self.linear = tf.keras.layers.Activation(tf.keras.activations.linear)

    def call(self, inputs, training=None, **kwargs):
        x = self.conv1(inputs) #先进行一个正常的卷积运算,卷积核为(1,1)
        x = self.bn1(x, training=training) #标准化
        if self.NL == "HS": #使用激活函数
            x = h_swish(x)
        elif self.NL == "RE":
            x = tf.nn.relu6(x)
        x = self.dwconv(x) #进行深度卷积
        x = self.bn2(x, training=training) #标准化
        if self.NL == "HS": #使用激活函数
            x = h_swish(x)
        elif self.NL == "RE":
            x = tf.nn.relu6(x)
        if self.is_se_existing: 
            x = self.se(x) #使用轻量级注意力模型
        x = self.conv2(x) #再进行一个(1,1)的卷积,与前面的深度卷积构成深度可分离卷积结构
        x = self.bn3(x, training=training) #标准化
        x = self.linear(x) #使用线性激活函数
        if self.stride == 1 and self.in_size == self.out_size: #满足这个条件的时候,与原图像进行堆叠
            x = tf.keras.layers.add([x, inputs])
        return x

Step3:构建MobilenetV3_small网络

网络结构示意图如下。
在这里插入图片描述
对应的程序如下。

class MobileNetV3Small(tf.keras.Model):
    def __init__(self):
        super(MobileNetV3Small, self).__init__()
        #------------------------------------------------#
        #下面的卷积类型与参数都是和论文中的那个表格对应的
        #------------------------------------------------#
        self.conv1 = tf.keras.layers.Conv2D(filters=16,kernel_size=(3, 3),strides=2,padding="same")
        self.bn1 = tf.keras.layers.BatchNormalization()
        self.bneck1 = BottleNeck(in_size=16, exp_size=16, out_size=16, s=2, is_se_existing=True, NL="RE", k=3)
        self.bneck2 = BottleNeck(in_size=16, exp_size=72, out_size=24, s=2, is_se_existing=False, NL="RE", k=3)
        self.bneck3 = BottleNeck(in_size=24, exp_size=88, out_size=24, s=1, is_se_existing=False, NL="RE", k=3)
        self.bneck4 = BottleNeck(in_size=24, exp_size=96, out_size=40, s=2, is_se_existing=True, NL="HS", k=5)
        self.bneck5 = BottleNeck(in_size=40, exp_size=240, out_size=40, s=1, is_se_existing=True, NL="HS", k=5)
        self.bneck6 = BottleNeck(in_size=40, exp_size=240, out_size=40, s=1, is_se_existing=True, NL="HS", k=5)
        self.bneck7 = BottleNeck(in_size=40, exp_size=120, out_size=48, s=1, is_se_existing=True, NL="HS", k=5)
        self.bneck8 = BottleNeck(in_size=48, exp_size=144, out_size=48, s=1, is_se_existing=True, NL="HS", k=5)
        self.bneck9 = BottleNeck(in_size=48, exp_size=288, out_size=96, s=2, is_se_existing=True, NL="HS", k=5)
        self.bneck10 = BottleNeck(in_size=96, exp_size=576, out_size=96, s=1, is_se_existing=True, NL="HS", k=5)
        self.bneck11 = BottleNeck(in_size=96, exp_size=576, out_size=96, s=1, is_se_existing=True, NL="HS", k=5)
        self.conv2 = tf.keras.layers.Conv2D(filters=576,kernel_size=(1, 1),strides=1,padding="same")
        self.bn2 = tf.keras.layers.BatchNormalization()
        self.avgpool = tf.keras.layers.AveragePooling2D(pool_size=(7, 7),strides=1)
        self.conv3 = tf.keras.layers.Conv2D(filters=1280,kernel_size=(1, 1),strides=1,padding="same")
        self.conv4 = tf.keras.layers.Conv2D(filters=NUM_CLASSES,kernel_size=(1, 1),strides=1,padding="same",activation=tf.keras.activations.softmax)

    def call(self, inputs, training=None, mask=None):
        x = self.conv1(inputs)
        x = self.bn1(x, training=training)
        x = h_swish(x)
        x = self.bneck1(x, training=training)
        x = self.bneck2(x, training=training)
        x = self.bneck3(x, training=training)
        x = self.bneck4(x, training=training)
        x = self.bneck5(x, training=training)
        x = self.bneck6(x, training=training)
        x = self.bneck7(x, training=training)
        x = self.bneck8(x, training=training)
        x = self.bneck9(x, training=training)
        x = self.bneck10(x, training=training)
        x = self.bneck11(x, training=training)
        x = self.conv2(x)
        x = self.bn2(x, training=training)
        x = h_swish(x)
        x = self.avgpool(x)
        x = self.conv3(x)
        x = h_swish(x)
        x = self.conv4(x)
        return x

注意NUM_CLASSES这个参数我没有设定,你的根据自己具体问题的需求设定。

Step4:如何调用建立的MobilenetV3进行训练

训练数据的形式与预处理。

首先,训练数据的结构如下面的这个文件夹的结构所示。
在这里插入图片描述
这是一个对花进行分类的数据集,第三层文件夹分别对应着花的类名,然后里面存放要用来训练的图片,对图片的文件名没有要求。将数据按照上图所示放好之后,就可以用如下程序进行预处理以生成tfrecord文件。

import tensorflow as tf
from configuration import train_dir, valid_dir, test_dir, train_tfrecord, valid_tfrecord, test_tfrecord #这是从配置文件里面导入参数,你也可以自己设置参数值。
import random

def get_images_and_labels(data_root_dir):
    # get all images' paths (format: string)
    data_root = pathlib.Path(data_root_dir)
    all_image_path = [str(path) for path in list(data_root.glob('*/*'))]
    # get labels' names
    label_names = sorted(item.name for item in data_root.glob('*/'))
    # dict: {label : index}
    label_to_index = dict((label, index) for index, label in enumerate(label_names))
    # get all images' labels
    all_image_label = [label_to_index[pathlib.Path(single_image_path).parent.name] for single_image_path in all_image_path]
    return all_image_path, all_image_label
    
# convert a value to a type compatible tf.train.Feature
def _bytes_feature(value):
    # Returns a bytes_list from a string / byte.
    if isinstance(value, type(tf.constant(0.))):
        value = value.numpy()   # BytesList won't unpack a string from an EagerTensor.
    return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value]))


def _float_feature(value):
    # Returns a float_list from a float / double.
    return tf.train.Feature(float_list=tf.train.FloatList(value=[value]))


def _int64_feature(value):
    # Returns an int64_list from a bool / enum / int / uint.
    return tf.train.Feature(int64_list=tf.train.Int64List(value=[value]))


# Create a dictionary with features that may be relevant.
def image_example(image_string, label):
    feature = {
        'label': _int64_feature(label),
        'image_raw': _bytes_feature(image_string),
    }

    return tf.train.Example(features=tf.train.Features(feature=feature))


def shuffle_dict(original_dict):
    keys = []
    shuffled_dict = {}
    for k in original_dict.keys():
        keys.append(k)
    random.shuffle(keys)
    for item in keys:
        shuffled_dict[item] = original_dict[item]
    return shuffled_dict


def dataset_to_tfrecord(dataset_dir, tfrecord_name):
    image_paths, image_labels = get_images_and_labels(dataset_dir)
    image_paths_and_labels_dict = {}
    for i in range(len(image_paths)):
        image_paths_and_labels_dict[image_paths[i]] = image_labels[i]
    # shuffle the dict
    image_paths_and_labels_dict = shuffle_dict(image_paths_and_labels_dict)
    # write the images and labels to tfrecord format file
    with tf.io.TFRecordWriter(path=tfrecord_name) as writer:
        for image_path, label in image_paths_and_labels_dict.items():
            print("Writing to tfrecord: {}".format(image_path))
            image_string = open(image_path, 'rb').read()
            tf_example = image_example(image_string, label)
            writer.write(tf_example.SerializeToString())
if __name__ == '__main__':
    dataset_to_tfrecord(dataset_dir=train_dir, tfrecord_name=train_tfrecord)
    dataset_to_tfrecord(dataset_dir=valid_dir, tfrecord_name=valid_tfrecord)
    dataset_to_tfrecord(dataset_dir=test_dir, tfrecord_name=test_tfrecord)

处理之后就变成下面这个样子啦。会在dataset文件夹之下生成三个tfrecord文件,tfrecord文件里面存储的应该是图片的路径+对应图片的标签。然后在训练的时候,只需要用到这三个文件就行了。
在这里插入图片描述

训练数据的调用

def get_the_length_of_dataset(dataset):
    #用来计算数据集里面数据的个数的函数
    count = 0
    for i in dataset:
        count += 1
    return count
def get_parsed_dataset(tfrecord_name):
    raw_dataset = tf.data.TFRecordDataset(tfrecord_name)
    parsed_dataset = raw_dataset.map(_parse_image_function)
    return parsed_dataset
def generate_datasets():
    train_dataset = get_parsed_dataset(tfrecord_name=train_tfrecord) #读tfrecord文件
    valid_dataset = get_parsed_dataset(tfrecord_name=valid_tfrecord) #读tfrecord文件
    test_dataset = get_parsed_dataset(tfrecord_name=test_tfrecord) #读tfrecord文件

    train_count = get_the_length_of_dataset(train_dataset) #获取训练集的数量
    valid_count = get_the_length_of_dataset(valid_dataset) #获取val集的数量
    test_count = get_the_length_of_dataset(test_dataset) #获取test集的数量

    # read the dataset in the form of batch
    train_dataset = train_dataset.batch(batch_size=BATCH_SIZE) #设置批处理规模
    valid_dataset = valid_dataset.batch(batch_size=BATCH_SIZE) #设置批处理规模
    test_dataset = test_dataset.batch(batch_size=BATCH_SIZE) #设置批处理规模
    return train_dataset, valid_dataset, test_dataset, train_count, valid_count, test_count
# 调取数据
train_dataset, valid_dataset, test_dataset, train_count, valid_count, test_count = generate_datasets() #

训练方式1:使用原始数据训练

程序图下,刚看这个程序的时候,建议从main函数开始看,前面的函数的作用可以先不用管,等用到了再看,这样便于理解。

from __future__ import absolute_import, division, print_function
import tensorflow as tf
from configuration import IMAGE_HEIGHT, IMAGE_WIDTH, CHANNELS, \
    EPOCHS, BATCH_SIZE, save_model_dir, model_index, save_every_n_epoch #这个是从配置文件导入超参数配置,你可以自己写就行。
import math
from models import mobilenet_v3_small

def print_model_summary(network):
    #这个函数是用来打印出神经网络结构的,对训练没有任何帮助
    network.build(input_shape=(None, IMAGE_HEIGHT, IMAGE_WIDTH, CHANNELS))
    network.summary()
def get_parsed_dataset(tfrecord_name):
    raw_dataset = tf.data.TFRecordDataset(tfrecord_name)
    parsed_dataset = raw_dataset.map(_parse_image_function)
    return parsed_dataset
def process_features(features, data_augmentation):
    #这个函数是用来分离图片路径和类别标签
    image_raw = features['image_raw'].numpy()
    image_tensor_list = []
    for image in image_raw:
        image_tensor = load_and_preprocess_image(image, data_augmentation=data_augmentation)
        image_tensor_list.append(image_tensor)
    images = tf.stack(image_tensor_list, axis=0)
    labels = features['label'].numpy()
    return images, labels
    
def load_and_preprocess_image(image_raw, data_augmentation=False):
    # decode
    image_tensor = tf.io.decode_image(contents=image_raw, channels=CHANNELS, dtype=tf.dtypes.float32)

    if data_augmentation:
        image = tf.image.random_flip_left_right(image=image_tensor)
        image = tf.image.resize_with_crop_or_pad(image=image,
                                                 target_height=int(IMAGE_HEIGHT * 1.2),
                                                 target_width=int(IMAGE_WIDTH * 1.2))
        image = tf.image.random_crop(value=image, size=[IMAGE_HEIGHT, IMAGE_WIDTH, CHANNELS])
        image = tf.image.random_brightness(image=image, max_delta=0.5)
    else:
        image = tf.image.resize(image_tensor, [IMAGE_HEIGHT, IMAGE_WIDTH])
    return image

def generate_datasets():
    train_dataset = get_parsed_dataset(tfrecord_name=train_tfrecord) #读tfrecord文件
    valid_dataset = get_parsed_dataset(tfrecord_name=valid_tfrecord) #读tfrecord文件
    test_dataset = get_parsed_dataset(tfrecord_name=test_tfrecord) #读tfrecord文件

    train_count = get_the_length_of_dataset(train_dataset) #获取训练集的数量
    valid_count = get_the_length_of_dataset(valid_dataset) #获取val集的数量
    test_count = get_the_length_of_dataset(test_dataset) #获取test集的数量

    # read the dataset in the form of batch
    train_dataset = train_dataset.batch(batch_size=BATCH_SIZE) #设置批处理规模
    valid_dataset = valid_dataset.batch(batch_size=BATCH_SIZE) #设置批处理规模
    test_dataset = test_dataset.batch(batch_size=BATCH_SIZE) #设置批处理规模
    return train_dataset, valid_dataset, test_dataset, train_count, valid_count, test_count

if __name__ == '__main__':
    # GPU settings:在tensorflow2.0的gpu版本里,会自动调用GPU计算,所以实际上下面这4行是多余的。
    gpus = tf.config.list_physical_devices("GPU")
    if gpus:
        for gpu in gpus:
            tf.config.experimental.set_memory_growth(gpu, True)

    #调取数据
    train_dataset, valid_dataset, test_dataset, train_count, valid_count, test_count = generate_datasets() #

    #获取神经网络模型
    model = mobilenet_v3_small.MobileNetV3Small()
    print_model_summary(network=model) #打印出神经网络结构

    # define loss and optimizer:定义损失函数预优化器
    loss_object = tf.keras.losses.SparseCategoricalCrossentropy() #用的是交叉熵损失函数
    optimizer = tf.keras.optimizers.RMSprop() #这是一个梯度下降加速器。

    train_loss = tf.keras.metrics.Mean(name='train_loss')
    train_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(name='train_accuracy')

    valid_loss = tf.keras.metrics.Mean(name='valid_loss')
    valid_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(name='valid_accuracy')

    # @tf.function
    def train_step(image_batch, label_batch):
        #用于训练的函数
        with tf.GradientTape() as tape: #tf.GradientType()是一个自动求导的api,把求导的操作都记录在tape中,然后供后面使用
            predictions = model(image_batch, training=True)
            loss = loss_object(y_true=label_batch, y_pred=predictions)
        gradients = tape.gradient(loss, model.trainable_variables) #trainable_variables代表可训练变量
        optimizer.apply_gradients(grads_and_vars=zip(gradients, model.trainable_variables))

        train_loss.update_state(values=loss)
        train_accuracy.update_state(y_true=label_batch, y_pred=predictions)

    # @tf.function
    def valid_step(image_batch, label_batch):
        #训练集里面分离出的测试集
        predictions = model(image_batch, training=False)
        v_loss = loss_object(label_batch, predictions)

        valid_loss.update_state(values=v_loss)
        valid_accuracy.update_state(y_true=label_batch, y_pred=predictions)

    # start training
    for epoch in range(EPOCHS):
        step = 0
        for features in train_dataset:
            step += 1
            images, labels = process_features(features, data_augmentation=True)
            train_step(images, labels) #开始训练
            print("Epoch: {}/{}, step: {}/{}, loss: {:.5f}, accuracy: {:.5f}".format(epoch,
                                                                                     EPOCHS,
                                                                                     step,
                                                                                     math.ceil(train_count / BATCH_SIZE),
                                                                                     train_loss.result().numpy(),
                                                                                     train_accuracy.result().numpy()))

        for features in valid_dataset:
            valid_images, valid_labels = process_features(features, data_augmentation=False)
            valid_step(valid_images, valid_labels)

        print("Epoch: {}/{}, train loss: {:.5f}, train accuracy: {:.5f}, "
              "valid loss: {:.5f}, valid accuracy: {:.5f}".format(epoch,
                                                                  EPOCHS,
                                                                  train_loss.result().numpy(),
                                                                  train_accuracy.result().numpy(),
                                                                  valid_loss.result().numpy(),
                                                                  valid_accuracy.result().numpy()))
        train_loss.reset_states() #每进行一次迭代之后,都需要重置这些数据
        train_accuracy.reset_states()
        valid_loss.reset_states()
        valid_accuracy.reset_states()

        if epoch % save_every_n_epoch == 0:
            model.save_weights(filepath=save_model_dir+"epoch-{}".format(epoch), save_format='tf')


    # save weights
    model.save_weights(filepath=save_model_dir+"model", save_format='tf') #保存训练的权值

训练方式2:使用别人的预训练权值进行训练(迁移学习)

训练别人的权值,只需要先下载好别人的ckpt格式的预训练权值文件,然后在训练方式1中的model = mobilenet_v3_small.MobileNetV3Small()这一句后面加上下面这一句即可。

model.load_weights('权值文件的路径') 

Step5:如何利用训练好的MobilenetV3模型进行预测

相比于训练,预测起来就很简单啦,代码如下。

import tensorflow as tf
from configuration import test_image_dir #这个是从配置文件导入超参数配置,你可以自己写就行。
from train import get_model
import time

def load_and_preprocess_image(image_raw, data_augmentation=False):#进行图像增强和剪裁的函数。
    # decode
    image_tensor = tf.io.decode_image(contents=image_raw, channels=CHANNELS, dtype=tf.dtypes.float32)

    if data_augmentation:
        image = tf.image.random_flip_left_right(image=image_tensor)
        image = tf.image.resize_with_crop_or_pad(image=image,
                                                 target_height=int(IMAGE_HEIGHT * 1.2),
                                                 target_width=int(IMAGE_WIDTH * 1.2))
        image = tf.image.random_crop(value=image, size=[IMAGE_HEIGHT, IMAGE_WIDTH, CHANNELS])
        image = tf.image.random_brightness(image=image, max_delta=0.5)
    else:
        image = tf.image.resize(image_tensor, [IMAGE_HEIGHT, IMAGE_WIDTH])
    return image

def get_single_picture_prediction(model, picture_dir):#进行预测的函数
    image_tensor = load_and_preprocess_image(tf.io.read_file(filename=picture_dir), data_augmentation=False)#这里进行图片增强和变成输入尺寸
    #tf.io.read_file用于图像数据的读取,相当于python里面的open,读取完文件后,一般要配合tf.image.decode_image()对图片解码。
    image = tf.expand_dims(image_tensor, axis=0)
    start = time.time()
    prediction = model(image, training=False) #开始预测
    end = time.time()
    print("用时:",end - start)
    pred_class = tf.math.argmax(prediction, axis=-1) #非极大值抑制
    return pred_class


if __name__ == '__main__':
    # GPU settings:在tensorflow2.0的gpu版本里,会自动调用GPU计算,所以实际上下面这4行是多余的。
    gpus = tf.config.list_physical_devices('GPU')
    if gpus:
        for gpu in gpus:
            tf.config.experimental.set_memory_growth(gpu, True)

    # load the model
    model = get_model()
    model.load_weights(filepath="saved_model\epoch-30.ckpt") #加载权值,根据你自己的权值文件位置填路径
    while True:
        pred_class = get_single_picture_prediction(model, test_image_dir)
    print(pred_class.numpy())
  • 6
    点赞
  • 40
    收藏
    觉得还不错? 一键收藏
  • 打赏
    打赏
  • 11
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论 11
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

Ai_Taoism

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值