最近学习了基于FaceNet的Openface包。
在前面的文章里我们已经将图片中的人脸提取出来,并进行了对齐。接下来我们建立卷积网络,之后再使用triplet loss进行训练。
下面是卷积网络的代码。首先建立了Inception模块以供调用,接着建立了训练将调用的卷积网络。
# model: nn4.py
from torch import nn
from .BasicModule import BasicModule
from torch.nn import functional as F
class Inception(BasicModule):
def __init__(self, inputSize, kernelSize, kernelStride, outputSize, reduceSize, pool):
super(Inception, self).__init__()
self.layers = {}
poolFlag = True
fname = 0
for p in kernelSize, kernelStride, outputSize, reduceSize:
if len(p) == 4:
(_kernel, _stride, _output, _reduce) = p
self.layers[str(fname)] = nn.Sequential(
# Convolution 1*1
nn.Conv2d(inputSize, _reduce, 1),
nn.BatchNorm2d(_reduce),
nn.ReLU(),
# Convolution kernel*kernel
nn.Conv2d(_reduce, _output, _kernel, _stride),
nn.BatchNorm2d(_output),
nn.ReLU())
else:
if poolFlag:
assert len(p) == 1
self.layers[str(fname)] = nn.Sequential(
# pool
pool,
nn.Conv2d(inputSize, p, 1),
nn.BatchNorm2d(p),
nn.ReLU())
poolFlag = False
else:
assert len(p) == 1
self.layers[str(fname)] = nn.Sequential(
# Convolution 1*1
nn.Conv2d(inputSize, p, 1),
nn.BatchNorm2d(p),
nn.ReLU())
fname += 1
if poolFlag:
self.layers[str(fname)] = nn.Sequential(pool)
poolFlag = False
def forward(self, x):
for key, layer in self.layers.items:
if key == str(0):
out = layer(x)
else:
out = torch.cat((out, layer(x)), 1)
return out
class FaceDetectModule(BasicModule):
def __init__(self, opt):
super(FaceDetectMoule, self).__init__()
self.model_name = 'FaceDetectModule'
self.features = nn.Sequential(
nn.Conv2d(3, 64, 7, 2, 3), # input: 3 output: 64 kernel size: 7 stride: 2 padding: 3
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(3, 2, 1),
nn.LocalResponseNorm(5),
#Inception (2)
nn.Conv2d(64, 64, 1),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.Conv2d(64, 192, 3, 1, 1), # 这有点问题
nn.BatchNorm2d(192),
nn.ReLU(),
nn.LocalResponseNorm(5),
nn.MaxPool2d(3, 2, 1),
#Inception(3a)
Inception(
inputSize = 192,
kernelSize = [3, 5],
kernelStride = [1, 1],
outputSize = [128, 32],
reduceSize = [96, 16, 32, 64],
pool = nn.MaxPool2d(3, 1, 1)),
#Inception(3b)
Inception(
inputSize = 256,
kernelSize = [3, 5],
kernelStride = [1, 1],
outputSize = [128, 64],
reduceSize = [96, 32, 64, 64],
pool = nn.LPPool2d(2, 3, 1)),
#Inception(3c)
Inception(
inputSize = 320,
kernelSize = [3, 5],
kernelStride = [2, 2],
outputSize = [256, 64],
reduceSize = [128, 32],
pool = nn.MaxPool2d(3, 2, 2)), # 这一层里没有单独的1*1卷基层,只有一个最大池化直接输出了
#Inception(4a)
Inception(
inputSize = 640,
kernelSize = [3, 5],
kernelStride = [1, 1],
outputSize = [192, 64],
reduceSize = [96, 32, 128, 256],
pool = nn.LPPool2d(2, 3, 1)),
#Inception(4b)
Inception(
inputSize = 640,
kernelSize = [3, 5],
kernelStride = [1, 1],
outputSize = [224, 64],
reduceSize = [112, 32, 128, 224],
pool = nn.LPPool2d(2, 3, 1)),
#Inception(4c)
Inception(
inputSize = 640,
kernelSize = [3, 5],
kernelStride = [1, 1],
outputSize = [256, 64],
reduceSize = [128, 32, 128, 192],
pool = nn.LPPool2d(2, 3, 1)),
#Inception(4d)
Inception(
inputSize = 640,
kernelSize = [3, 5],
kernelStride = [1, 1],
outputSize = [288, 64],
reduceSize = [144, 32, 128, 160],
pool = nn.LPPool2d(2, 3, 1)),
#Inception(4e)
Inception(
inputSize = 640,
kernelSize = [3, 5],
kernelStride = [2, 2],
outputSize = [256, 128],
reduceSize = [160, 64],
pool = nn.MaxPool2d(3, 2, 1)),
#Inception(5a)
Inception(
inputSize = 1024,
kernelSize = [3],
kernelStride = [1],
outputSize = [384],
reduceSize = [192, 128, 384],
pool = nn.LPPool2d(2, 3, 1)), #这里只有一个3*3卷积层,没有5*5卷积层
#Inception(5b)
Inception(
inputSize = 896,
kernelSize = [3],
kernelStride = [1],
outputSize = [384],
reduceSize = [192, 128, 384],
pool = nn.MaxPool2d(3, 1, 1)),
nn.AvgPool2d(3))
self.fc = nn.Linear(896, opt.embSize)
def forward(self, x):
out = self.features(x)
out.view(-1,896)
out = self.fc(out)
out = F.normalize(out, p = 2)
return out