一、迁移学习概念
迁移学习(Transfer learning) :
- 把已训练好的模型(预训练模型)参数迁移到新的模型来帮助新模型训练。
- 大部分数据或任务存在相关性的,迁移学习可以将已经学到的模型参数,通过某种方式(我主要用的是Fine tuning)来分享给新模型,从而加快并优化模型的学习效率不用从零开始学习。
深度网络存在的问题:
- 网络越深,需要的训练样本数越多。若用监督则需大量标注样本,不然小规模样本容易造成过拟合。
- 多层神经网络参数优化,经常陷入局部最优。
- 梯度扩散问题,BP算法计算出的梯度随着深度向前而显著下降,导致前面网络参数贡献很小,更新速度慢。
解决方案:预训练(Pre-training)
- 训练网络模型的时候,需要随机初始化参数,不断调整直到网络的损失越来越小。当训练结果满意时保存参数,以后这个模型在下次执行类似任务时获得较好的结果。(一般使用权威的数据集上别人已经训练好的模型参数,例如Keras官方提供的Xception,VGG16,VGG19,ResNet, ResNetV2, ResNeXt,InceptionV3,InceptionResNetV2,MobileNet,MobileNetV2,DenseNet,NASNet)
二、Fine-tuning 模型的三种方式:
- 只预测,不训练。
特点:相对快、简单,针对那些已经训练好,现在要实际对未知数据进行标注的项目,非常高效。 - 训练,但只训练最后分类层。
特点:fine-tuning的模型最终的分类以及符合要求,现在只是在他们的基础上进行类别降维。让模型更符合特定的图像分类,使用较多。 - 完全训练,分类层+之前卷积层都训练。
特点:跟方式2的差异很小,当然方式3比较耗时和需要训练GPU资源,不过非常适合Fine-tuning到自己想要的模型里面,预测精度相比状态二也提高不少。
- 总结:网络框架一样的话,通常是训练前期使用方式2,然后使用方式3。学习率(LR)和BATCH_SIZE(BS)要调整,前期LR和BS大些,后期LR和BS小些。
代码
主干网络是仿照T3D搭的C3D,预训练模型是mobilenet。
由于T3D网络的预训练模型是densenet169,输入尺寸很难更改,为了更方便调节输入尺寸来训练自己的数据,将densenet169改成可调节尺寸的mobilenet。T3D改成C3D,没什么特殊含义纯粹是因为它的参数太大了,电脑跑不动。/(ㄒoㄒ)/~~
主干网络代码
# -*- coding: utf-8 -*-
"""
Created on Fri Oct 16 14:52:58 2020
3D-CNN 模型
@author: Onion
"""
import tensorflow as tf
from keras import backend as K
import keras
from keras.models import Sequential, Model
from keras.layers import Input, BatchNormalization, Activation, Conv3D, Dropout, Concatenate, \
AveragePooling3D, MaxPooling3D, Dense, Flatten, GlobalAveragePooling2D, add, GlobalAveragePooling3D
from keras.activations import linear, softmax
from keras.optimizers import Adam
from keras.regularizers import l2
# from keras.applications import densenet
from mobilenet import MobileNet # 自己构建网络,这样就可以对输入尺寸调整
# C3D + 迁移学习
def C3D(input_shape, num_classes):
# 读取预训练权重的框架
# 1 #-----------------------------------------------------------------
alpha = 0.25
inp_2d = (Input(shape=(112,112,3), name='2d_input'))
#inp_2d.get_shape().as_list() # 得到该层的尺寸
mobilenet = MobileNet(inp_2d, classes=2, alpha=alpha)
model_path = "model_data/mobilenet_5_0_224_tf_no_top.h5"
mobilenet.load_weights(model_path, by_name=True, skip_mismatch=True) # 加载权重
freeze_layers = 81 # 冻结层
for i in range(freeze_layers):
mobilenet.layers[i].trainable = True
# #-----------------------------------------------------------------
# # 2 #-----------------------------------------------------------------
# DenseNet169是Keras自带的
# inp_2d = (Input(shape=(224,224,3), name='2d_input'))
# pretrained_densenet = densenet.DenseNet169(include_top=False, input_shape=(224,224,3), input_tensor=inp_2d, weights='imagenet')
# for layer in pretrained_densenet.layers:
# layer.trainable = True
# pretrained_densenet.summary()
# print(pretrained_densenet.layers[594].output)
# pretrained_densenet.output
# # #-----------------------------------------------------------------
# First convolution-----------------------
inp_3d = (Input(shape=input_shape, name='3d_input'))
# need to check padding
x = Conv3D(32, kernel_size=(3, 3, 3), padding='same')(inp_3d)
x = Activation('relu')(x)
x = Conv3D(32, kernel_size=(3, 3, 3), padding='same')(x)
x = Activation('softmax')(x)
x = MaxPooling3D(pool_size=(3, 3, 3), padding='same')(x)
x = Dropout(0.5)(x)
x = Conv3D(64, kernel_size=(3, 3, 3), padding='same')(x)
x = Activation('relu')(x)
x = Conv3D(64, kernel_size=(3, 3, 3), padding='same')(x)
x = MaxPooling3D(pool_size=(3, 3, 3), padding='same')(x)
x = Dropout(0.5)(x)
x = Flatten()(x)
x = Dense(1024, activation='relu')(x)
#--------------fron 2d densenet model-----------------
# 预训练权重该层后则为全连接层
y = GlobalAveragePooling2D(name='avg_pool_densnet2d')(mobilenet.output)
y = Dense(1024, activation='relu')(y)
#-----------------------------------------------------
x = keras.layers.concatenate([x,y])
x = Dropout(0.5)(x)
x = Dense(512, activation='relu')(x)
x = Dropout(0.5)(x)
x = Dense(128, activation='relu')(x)
x = Dropout(0.35)(x)
out = Dense(num_classes, activation='softmax')(x)
model = Model(inputs=[inp_2d, inp_3d], outputs=[out])
opt = Adam(lr = 1e-4)
model.compile(optimizer=opt, loss='categorical_crossentropy', metrics=['acc'])
model.summary()
return model
预训练网络代码
import warnings
from keras import backend as K
from keras.layers import (Activation, BatchNormalization, Conv2D,
DepthwiseConv2D, Dropout, GlobalAveragePooling2D,
GlobalMaxPooling2D, Input, Reshape)
from keras.models import Model
def _conv_block(inputs, filters, alpha, kernel=(3, 3), strides=(1, 1)):
filters = int(filters * alpha)
x = Conv2D(filters, kernel,
padding='same',
use_bias=False,
strides=strides,
name='conv1')(inputs)
x = BatchNormalization(name='conv1_bn')(x)
return Activation(relu6, name='conv1_relu')(x)
def _depthwise_conv_block(inputs, pointwise_conv_filters, alpha,
depth_multiplier=1, strides=(1, 1), block_id=1):
pointwise_conv_filters = int(pointwise_conv_filters * alpha)
x = DepthwiseConv2D((3, 3),
padding='same',
depth_multiplier=depth_multiplier,
strides=strides,
use_bias=False,
name='conv_dw_%d' % block_id)(inputs)
x = BatchNormalization(name='conv_dw_%d_bn' % block_id)(x)
x = Activation(relu6, name='conv_dw_%d_relu' % block_id)(x)
x = Conv2D(pointwise_conv_filters, (1, 1),
padding='same',
use_bias=False,
strides=(1, 1),
name='conv_pw_%d' % block_id)(x)
x = BatchNormalization(name='conv_pw_%d_bn' % block_id)(x)
return Activation(relu6, name='conv_pw_%d_relu' % block_id)(x)
def MobileNet(input_shape,
alpha=1.0,
depth_multiplier=1,
dropout=1e-3,
classes=2):
# img_input = Input(shape=input_shape)
img_input = input_shape
# 224,224,3 -> 112,112,32
x = _conv_block(img_input, 32, alpha, strides=(2, 2))
# 112,112,32 -> 112,112,64
x = _depthwise_conv_block(x, 64, alpha, depth_multiplier, block_id=1)
# 112,112,64 -> 56,56,128
x = _depthwise_conv_block(x, 128, alpha, depth_multiplier,
strides=(2, 2), block_id=2)
x = _depthwise_conv_block(x, 128, alpha, depth_multiplier, block_id=3)
# 56,56,128 -> 28,28,256
x = _depthwise_conv_block(x, 256, alpha, depth_multiplier,
strides=(2, 2), block_id=4)
x = _depthwise_conv_block(x, 256, alpha, depth_multiplier, block_id=5)
# 28,28,256 -> 14,14,512
x = _depthwise_conv_block(x, 512, alpha, depth_multiplier,
strides=(2, 2), block_id=6)
x = _depthwise_conv_block(x, 512, alpha, depth_multiplier, block_id=7)
x = _depthwise_conv_block(x, 512, alpha, depth_multiplier, block_id=8)
x = _depthwise_conv_block(x, 512, alpha, depth_multiplier, block_id=9)
x = _depthwise_conv_block(x, 512, alpha, depth_multiplier, block_id=10)
x = _depthwise_conv_block(x, 512, alpha, depth_multiplier, block_id=11)
# 14,14,512 -> 7,7,1024
x = _depthwise_conv_block(x, 1024, alpha, depth_multiplier,
strides=(2, 2), block_id=12)
x = _depthwise_conv_block(x, 1024, alpha, depth_multiplier, block_id=13)
# # 7,7,1024 -> 1,1,1024
# x = GlobalAveragePooling2D()(x)
#
# shape = (1, 1, int(1024 * alpha))
#
# x = Reshape(shape, name='reshape_1')(x)
# x = Dropout(dropout, name='dropout')(x)
#
# x = Conv2D(classes, (1, 1),padding='same', name='conv_preds')(x)
# x = Activation('softmax', name='act_softmax')(x)
# x = Reshape((classes,), name='reshape_2')(x)
inputs = img_input
model = Model(inputs, x, name='mobilenet_%0.2f' % (alpha))
return model
def relu6(x):
return K.relu(x, max_value=6)
参考资料:
预训练与微调:https://blog.csdn.net/qq_35290785/article/details/89949289
T3D-keras代码:https://github.com/rekon/T3D-keras
文章:https://arxiv.org/pdf/1711.08200.pdf
优化后预训练的视频来源:https://www.bilibili.com/video/BV1mJ411d7dG
相关博客和代码:
https://blog.csdn.net/weixin_44791964/article/details/102779878
https://github.com/bubbliiiing/classification-keras
安利B站UP主:Bubbliiiing
B站主页:https://space.bilibili.com/472467171
CSDN主页:https://blog.csdn.net/weixin_44791964?spm=1001.2014.3001.5509