前言
前文已经介绍了mtcnn算法通过训练一个分类器进行图片的修正
二、
1.引入库
import numpy as np
import mtcnn.caffe_pb2 as pb
import cv2
from matplotlib import pyplot as pltsl._create_unverified_context
2.读入数据
raw_image = cv2.imread("face.jpg")
input_image = cv2.cvtColor(raw_image,cv2.COLOR_BGR2YCrCb)#rgb进行颜色
input_image = (input_image-127.5) *0.0078125#进行归一化然后使其在-1到1的区间
origin_h, origin_w, _ = input_image.shape
其中COLOR_BGR2YCrCb 是一个用于图像颜色空间转换的枚举值,它指定将图像从BGR(默认的OpenCV颜色格式)转换为YCrCb颜色空间。
建立图像金字塔的scales
def calculateScales(img):
pr_scale = 1.0
h, w, _ = img.shape
if min(w, h) > 500:
pr_scale = 500.0/min(h, w)
w = int(w*pr_scale)
h = int(h*pr_scale)
elif max(w, h)<500:
pr_scale = 500.0/max(h,w)
w = int(w*pr_scale)
h = int(h*pr_scale)
scales = []
factor = 0.709
factor_count = 0
minl = min(h, w)
while minl >= 12:
scales.append(pr_scale*pow(factor, factor_count))
minl *= factor
factor_count += 1
return scales
#%%
scales = calculateScales(input_image)
scales
[1.0,
0.709,
0.5026809999999999,
0.3564008289999999,
0.25268818776099994,
0.17915592512254896,
0.12702155091188722,
0.09005827959652803,
0.06385132023393836,
0.045270586045862295,结果如下
主体封装
net = pb.NetParameter()
with open(r"D:\BaiduNetdiskDownload\第四期资料\2020\12.14课件MTCNN-BP-Conv\mtcnn\det1.caffemodel", "rb") as f:
net.ParseFromString(f.read())
layer_mapper = {item.name: item for item in net.layer}
image = cv2.imread("my.jpg")
image.shape
image = image.transpose(2, 0, 1)[None]
image.shape
layer_mapper
class Initializer:
def __init__(self, name):
self.name = name
def __call__(self, *args):
return self.apply(*args)
class GaussInitializer(Initializer):
# where :math:`\mu` is the mean and :math:`\sigma` the standard
# deviation. The square of the standard deviation, :math:`\sigma^2`,
# is called the variance.
def __init__(self, mu, sigma):
self.mu = mu
self.sigma = sigma
def apply(self, value):
value[...] = np.random.normal(self.mu, self.sigma, value.shape)
class Parameter:
def __init__(self, value):
self.value = value
self.delta = np.zeros(value.shape)
def zero_grad(self):
self.delta[...] = 0
def conv2d_forward(x, kernel, bias, in_feature, out_feature, kernel_size, padding=0, stride=1):
in_shape = x.shape
ib, ic, ih, iw = in_shape
oh = (ih + padding * 2 - kernel_size) // stride + 1
ow = (iw + padding * 2 - kernel_size) // stride + 1
col_w = oh * ow
col_h = kernel_size * kernel_size * in_feature
column = np.zeros((ib, col_h, col_w))
output = np.zeros((ib, out_feature, oh, ow))
khalf = kernel_size // 2
kcol = np.array(kernel).reshape(out_feature, -1)
for b in range(ib):
for c in range(ic):
for oy in range(oh):
for ox in range(ow):
for ky in range(kernel_size):
for kx in range(kernel_size):
column_y = ky * kernel_size + kx + c * kernel_size * kernel_size
column_x = ox + oy * ow
ix = ox * stride + kx - padding
iy = oy * stride + ky - padding
if ix >= 0 and iy >= 0 and ix < iw and iy < ih:
column[b, column_y, column_x] = x[b, c, iy, ix]
output[b] = (kcol @ column[b]).reshape(out_feature, oh, ow) + np.array(bias).reshape(out_feature, 1, 1)
return output
def prelu(x, weigth):
x = x.copy()
channels = x.shape[1]
for c in range(channels):
current_channel = x[:, c, :, :]
select = current_channel < 0
current_channel[select] *= weigth[c]
return x
def max_pooling2d(x, kernel_size, stride):
ib, ic, ih, iw = x.shape
output_height = int(np.ceil((ih - kernel_size) / stride) + 1)
output_width = int(np.ceil((iw - kernel_size) / stride) + 1)
output = np.zeros((ib, ic, output_height, output_width))
minvalue = float("-inf")
for b in range(ib):
for c in range(ic):
for oy in range(output_height):
for ox in range(output_width):
value = minvalue
for kx in range(kernel_size):
for ky in range(kernel_size):
ix = ox * stride + kx
iy = oy * stride + ky
if ix < iw and iy < ih:
value = max(value, x[b, c, iy, ix])
output[b, c, oy, ox] = value
return output
class Conv2d:
def __init__(self, kernel, bias, in_feature, out_feature, kernel_size, padding, stride):
self.kernel = kernel
self.bias = bias
self.in_feature = in_feature
self.out_feature = out_feature
self.kernel_size = kernel_size
self.padding = padding
self.stride = stride
def forward(self, x):
return conv2d_forward(x, self.kernel, self.bias, self.in_feature, self.out_feature, self.kernel_size,
self.padding, self.stride)
class PReLU:
def __init__(self, weight):
self.weight = weight
def forward(self, x):
return prelu(x, self.weight)
class MaxPooling2d:
def __init__(self, kernel_size, stride):
self.kernel_size = kernel_size
self.stride = stride
def forward(self, x):
return max_pooling2d(x, self.kernel_size, self.stride)
class PNet:
def __init__(self, layer_mapper):
conv1_weight = layer_mapper["conv1"]
prelu1_weight = layer_mapper["PReLU1"]
conv2_weight = layer_mapper["conv2"]
prelu2_weight = layer_mapper["PReLU2"]
conv3_weight = layer_mapper["conv3"]
prelu3_weight = layer_mapper["PReLU3"]
self.layers = [
Conv2d(
kernel=conv1_weight.blobs[0].data,
bias=conv1_weight.blobs[1].data,
in_feature=3,
out_feature=10,
kernel_size=3,
padding=0,
stride=1
),
PReLU(prelu1_weight.blobs[0].data),
MaxPooling2d(2, 2),
Conv2d(
kernel=conv2_weight.blobs[0].data,
bias=conv2_weight.blobs[1].data,
in_feature=10,
out_feature=16,
kernel_size=3,
padding=0,
stride=1
),
PReLU(prelu2_weight.blobs[0].data),
Conv2d(
kernel=conv3_weight.blobs[0].data,
bias=conv3_weight.blobs[1].data,
in_feature=16,
out_feature=32,
kernel_size=3,
padding=0,
stride=1
),
PReLU(prelu3_weight.blobs[0].data)
]
conv41_weight = layer_mapper["conv4-1"]
self.conv41 = Conv2d(
kernel=conv41_weight.blobs[0].data,
bias=conv41_weight.blobs[1].data,
in_feature=32,
out_feature=2,
kernel_size=1,
padding=0,
stride=1
)
conv42_weight = layer_mapper["conv4-2"]
self.conv42 = Conv2d(
kernel=conv42_weight.blobs[0].data,
bias=conv42_weight.blobs[1].data,
in_feature=32,
out_feature=4,
kernel_size=1,
padding=0,
stride=1
)
def forward(self, image):
x = image
for layer in self.layers:
x = layer.forward(x)
return self.conv41.forward(x), self.conv42.forward(x)
打印结果
out1 = []
out2 = []
for scale in scales:
dif_w = int(origin_w*scale)
dif_h = int(origin_h*scale)
dif_img = cv2.resize(input_image,(dif_w,dif_h))
input_image_tf = dif_img.transpose(2, 1, 0)[None]
classfy,reg = pnet.forward(input_image_tf)
out1.append(classfy)
out2.append(reg)
print("out1:",out1)
[-2.44777699, -1.12126755, -1.36328562, -3.53338945,
-3.44102572, -3.23571284],
[-2.59822884, -2.139627 , -4.29727245, -3.46872557,
-3.48400743, -3.33075571],
[-1.87888146, -1.86857724, -0.58649391, -1.20888515,
-0.78939488, -2.35174356],
[-2.78993544, -2.38483926, -0.73207628, -1.92172781,
-3.02145895, -3.97328327]]]]), array([[[[ 1.98634708, 2.07212636, 2.64673702],
[ 0.82790078, 0.73264951, 2.55249553],
[ 1.76121664, 1.01266622, 1.9616819 ]],
[[-1.41277386, -0.87433834, -1.85184214],
[-1.38557532, -0.8822668 , -3.65898619],
[-2.251872 , -2.31059625, -3.10679517]]]])]
out1[10]
array([[[[ 1.98634708, 2.07212636, 2.64673702],
[ 0.82790078, 0.73264951, 2.55249553],
[ 1.76121664, 1.01266622, 1.9616819 ]],
[[-1.41277386, -0.87433834, -1.85184214],
[-1.38557532, -0.8822668 , -3.65898619],
[-2.251872 , -2.31059625, -3.10679517]]]])