上篇博文,从整体上描述MTCNN人脸检测的技术实现流程,宏观上了解到MTCNN的模型有三个子网络构成,分别为P-Net、R-Net和O-Net,P-Net用于快速生成候选框,尽可能多的找到建议框,R-Net负责进一步精细化筛选建议框,O-Net对建议框做最后的筛选判定,并且进行人脸关键点的定位。依据个人想法,对MTCNN网络模型进行了简单的改进,如下图所示:
根据网络模型架构,利用pytorch实现网络模型,代码部分:
import torch
import torch.nn as nn
class DepthwiseConvBlock(nn.Module):
def __init__(self, in_channels, out_channels, kernel_size, stride):
super(DepthwiseConvBlock, self).__init__()
self.net_layer = nn.Sequential(
nn.Conv2d(in_channels, in_channels//2, 1, 1, padding=0),
nn.BatchNorm2d(in_channels//2),
nn.PReLU(),
nn.Conv2d(in_channels//2, in_channels//2, kernel_size, stride, groups=in_channels//2),
nn.BatchNorm2d(in_channels//2),
nn.PReLU(),
nn.Conv2d(in_channels//2, out_channels, 1, 1),
nn.BatchNorm2d(out_channels),
nn.PReLU()
)
def forward(self, x):
return self.net_layer(x)
class PNet(nn.Module):
def __init__(self):
super(PNet, self).__init__()
self.pnet_layer = nn.Sequential(
nn.Conv2d(3, 10, 3, 1, padding=0),
nn.PReLU(),
nn.MaxPool2d(2, 2),
DepthwiseConvBlock(10, 16, 3, 1),
DepthwiseConvBlock(16, 32, 3, 1)
)
self.conv4_1 = nn.Conv2d(32, 1, 1, 1)
self.conv4_2 = nn.Conv2d(32, 4, 1, 1)
def forward(self, x):
x = self.pnet_layer(x)
classify = torch.sigmoid(self.conv4_1(x))
offset = self.conv4_2(x)
return classify, offset
class RNet(nn.Module):
def __init__(self):
super(RNet, self).__init__()
self.rnet_layer = nn.Sequential(
nn.Conv2d(3, 28, 3, 1, padding=0),
nn.PReLU(),
nn.MaxPool2d(3, 2, padding=1),
DepthwiseConvBlock(28, 48, 3, 1),
nn.MaxPool2d(3, 2, padding=0),
DepthwiseConvBlock(48, 64, 2, 1)
)
self.fcn_layer = nn.Linear(3*3*64, 128, bias=True)
self.prelu = nn.PReLU()
self.conv5_1 = nn.Linear(128, 1)
self.conv5_2 = nn.Linear(128, 4)
def forward(self, x):
x = self.rnet_layer(x)
x = x.reshape(x.size(0), -1)
x = self.fcn_layer(x)
x = self.prelu(x)
classify = torch.sigmoid(self.conv5_1(x))
offset = self.conv5_2(x)
return classify, offset
class ONet(nn.Module):
def __init__(self):
super(ONet, self).__init__()
self.onet_layer = nn.Sequential(
nn.Conv2d(3, 32, 3, 1, padding=0),
nn.PReLU(),
nn.MaxPool2d(3, 2, padding=1),
DepthwiseConvBlock(32, 64, 3, 1),
nn.MaxPool2d(3, 2, padding=0),
DepthwiseConvBlock(64, 64, 3, 1),
nn.MaxPool2d(2, 2, padding=0),
DepthwiseConvBlock(64, 128, 2, 1)
)
self.fcn_layer = nn.Linear(3 * 3 * 128, 256, bias=True)
self.prelu = nn.PReLU()
self.conv6_1 = nn.Linear(256, 1)
self.conv6_2 = nn.Linear(256, 4)
def forward(self, x):
x = self.onet_layer(x)
x = x.reshape(x.size(0), -1)
x = self.fcn_layer(x)
x = self.prelu(x)
classify = torch.sigmoid(self.conv6_1(x))
offset = self.conv6_2(x)
return classify, offset