为了让大家,更快的上手,先看到效果,之后再一步一步的分析理论,我这里简要的介绍一下模型就开始上代码。
人脸识别模型与分类模型的区别?
- 识别2张图中的人脸的流程是:模型分别提取图片中人脸的特征,然后对比2张图片中人脸特征的相似度,如果相似度小于一个阈值,我们就认为他们是同一个人
- 分类模型的流程是:提取图片中的特征,然后用最后一次(softmax)层将特征值进行计算,它属于哪一类
- 于是乎,我们利用分类模型倒数第二层之前的提取特征的方式,提取图片中人脸的特征
所以接下来的任务我们是训练一个人脸分类模型
创建并训练模型
步骤如下:
- 我们借助ResNet18分类模型
- 由于AM-Softmax Loss比Softmax Loss更适合人脸识别(FaceNet 采用的Triple Loss,训练时间太久,放弃),我们采用wrap_cnn方法将AM-Softmax替换ResNet18中的Softmax层
- 利用前面博客的数据,训练模型
代码如下:
- 先创建AMSoftmax类
import tensorflow as tf
from keras import backend as K
from keras.layers import Dropout
from keras.engine.topology import Layer
from keras.models import Model
class AMSoftmax(Layer):
def __init__(self, units, **kwargs):
self.units = units
self.kernel = None
super(AMSoftmax, self).__init__(**kwargs)
def build(self, input_shape):
assert len(input_shape) >= 2
self.kernel = self.add_weight(name='kernel',
shape=(input_shape[1], self.units),
initializer='uniform',
trainable=True)
super(AMSoftmax, self).build(input_shape)
def call(self, inputs, **kwargs):
# get cosine similarity
# cosine = x * w / (||x|| * ||w||)
inputs = K.l2_normalize(inputs, axis=1)
kernel = K.l2_normalize(self.kernel, axis=0)
cosine = K.dot(inputs, kernel)
return cosine
def compute_output_shape(self, input_shape):
return input_shape[0], self.units
def get_config(self):
config = {'units': self.units}
base_config = super(AMSoftmax, self).get_config()
return dict(list(base_config.items()) + list(config.items()))
def amsoftmax_loss(y_true, y_pred, scale=30.0, margin=0.35):
# make two constant tensors.
m = K.constant(margin, name='m') # 边缘
s = K.constant(scale, name='s') # 比例
# reshape the label
label = K.reshape(K.argmax(y_true, axis=-1), shape=(-1, 1))
label = K.cast(label, dtype=tf.int32)
pred_batch = K.reshape(tf.range(K.shape(y_pred)[0]), shape=(-1, 1))
# concat the two column vectors, one is the pred_batch, the other is label.
ground_truth_indices = tf.concat([pred_batch,
K.reshape(label, shape=(-1, 1))], axis=1)
# get ground truth scores by indices
ground_truth_scores = tf.gather_nd(y_pred, ground_truth_indices)
# if ground_truth_score > m, group_truth_score = group_truth_score - m
added_margin = K.cast(K.greater(ground_truth_scores, m),
dtype=tf.float32) * m
added_margin = K.reshape(added_margin, shape=(-1, 1))
added_embedding_feature = tf.subtract(y_pred, y_true * added_margin) * s
cross_entropy = tf.compat.v1.nn.softmax_cross_entropy_with_logits_v2(labels=y_true,
logits=added_embedding_feature)
loss = tf.reduce_mean(cross_entropy)
return loss
- 创建ResNet18模型
from keras import layers
from keras.layers import Activation, Dropout, Conv2D, Dense
from keras.layers import BatchNormalization
from keras.layers import GlobalAveragePooling2D
from keras.layers import Input
from keras.layers import MaxPooling2D
from keras.layers import ZeroPadding2D,Add
from keras.models import Model
def basic_block(filters, kernel_size=3, is_first_block=True):
stride = 1
if is_first_block:
stride = 2
def f(x):
# f(x) named y
# 1st Conv
y = ZeroPadding2D(padding=1)(x)
y = Conv2D(filters, kernel_size, strides=stride, kernel_initializer='he_normal')(y)
y = BatchNormalization()(y)
y = Activation("relu")(y)
# 2nd Conv
y = ZeroPadding2D(padding=1)(y)
y = Conv2D(filters, kernel_size, kernel_initializer='he_normal')(y)
y = BatchNormalization()(y)
# f(x) + x
if is_first_block:
shortcut = Conv2D(filters, kernel_size=1, strides=stride, kernel_initializer='he_normal')(x)
shortcut = BatchNormalization()(shortcut)
else:
shortcut = x
y = Add()([y, shortcut])
y = Activation("relu")(y)
return y
return f
def ResNet18(input_shape, num_classes):
input_layer = Input(shape=input_shape, name="input")
# Conv1
x = layers.ZeroPadding2D(padding=(3, 3), name='conv1_pad')(input_layer)
x = layers.Conv2D(64, (7, 7),
strides=(2, 2),
padding='valid',
kernel_initializer='he_normal',
name='conv1')(x)
x = layers.BatchNormalization()(x)
x = layers.Activation('relu')(x)
x = layers.ZeroPadding2D(padding=(1, 1), name='pool1_pad')(x)
x = layers.MaxPooling2D((3, 3), strides=(2, 2))(x)
# Conv2
x = basic_block(filters=64)(x)
x = basic_block(filters=64, is_first_block=False)(x)
# Conv3
x = basic_block(filters=128)(x)
x = basic_block(filters=128, is_first_block=False)(x)
# Conv4
x = basic_block(filters=256)(x)
x = basic_block(filters=256, is_first_block=False)(x)
# Conv5
x = basic_block(filters=512)(x)
x = basic_block(filters=512, is_first_block=False)(x)
x = GlobalAveragePooling2D(name="feature")(x)
output_layer = Dense(num_classes, activation='softmax')(x)
model = Model(input_layer, output_layer)
return model
- wrap_cnn 方法将网络模型与AM-Softmax损失函数结合
def wrap_cnn(model, feature_layer, input_shape, num_classes):
#cnn = model(input_shape, num_classes)
assert isinstance(model, Model)
x = model.get_layer(name=feature_layer).output
x = Dropout(.5)(x)
output_layer = AMSoftmax(num_classes, name="predictions")(x)
return Model(inputs=model.input, outputs=output_layer)
- 所有工具都准备好了,开始运行主代码了
# 初始变量和模型
input_shape = (224,224,1)
num_classes = 40 #40个不同的人脸
num_epochs = 1000
batch_size = 64
model = ResNet18(input_shape,num_classes)
model = wrap_cnn(model,feature_layer="feature", input_shape = input_shape, num_classes = num_classes)
# 编译模型
model.compile(optimizer='adam',
loss=amsoftmax_loss,
metrics=['accuracy'])
# 训练模型
model.fit(X_train,y_train,
steps_per_epoch=10,
epochs=num_epochs,
verbose=1,
validation_data=(X_test,y_test),
validation_steps=len(y_test) / batch_size)
总结
以上是训练模型,训练须要GPU来训练,我用的google colab免费的GPU,大概每步需要20秒左右,半小时左右就可以训练出一个不错的模型(99%)以上的,CPU太慢了,不建议
上面模型还有很多的不足,在尽可能的减少代码的情况下,让大家直观的了解人脸识别的过程,接下来的博客,我们将提取到的人脸图片的特征,就可以进行人脸识别了