这是原先的代码
import numpy as np
import mtcnn.caffe_pb2 as pb
import cv2
from matplotlib import pyplot as plt
raw_image = cv2.imread("face.jpg")
input_image = cv2.cvtColor(raw_image, cv2.COLOR_BGR2YCrCb)
input_image = (input_image - 127.5) * 0.0078125
origin_h, origin_w, _ = input_image.shape
input_image = cv2.resize(input_image,(150,150))
#input_image.shape
def calculateScales(img):
pr_scale = 1.0
h, w, _ = img.shape
if min(w, h) > 500:
pr_scale = 500.0 / min(h, w)
w = int(w * pr_scale)
h = int(h * pr_scale)
elif max(w, h) < 500:
pr_scale = 500.0 / max(h, w)
w = int(w * pr_scale)
h = int(h * pr_scale)
scales = []
factor = 0.709
factor_count = 0
minl = min(h, w)
while minl >= 12:
scales.append(pr_scale * pow(factor, factor_count))
minl *= factor
factor_count += 1
return scales
scales = calculateScales(input_image)
scales
net = pb.NetParameter()
with open(r"D:\BaiduNetdiskDownload\\2020\12.14MTCNN-BP-Conv\mtcnn\det1.caffemodel", "rb") as f:
net.ParseFromString(f.read())
layer_mapper = {item.name: item for item in net.layer}
image = cv2.imread("my.jpg")
image.shape
image = image.transpose(2, 0, 1)[None]
image.shape
layer_mapper
class Initializer:
def __init__(self, name):
self.name = name
def __call__(self, *args):
return self.apply(*args)
class GaussInitializer(Initializer):
# where :math:`\mu` is the mean and :math:`\sigma` the standard
# deviation. The square of the standard deviation, :math:`\sigma^2`,
# is called the variance.
def __init__(self, mu, sigma):
self.mu = mu
self.sigma = sigma
def apply(self, value):
value[...] = np.random.normal(self.mu, self.sigma, value.shape)
class Parameter:
def __init__(self, value):
self.value = value
self.delta = np.zeros(value.shape)
def zero_grad(self):
self.delta[...] = 0
def conv2d_forward(x, kernel, bias, in_feature, out_feature, kernel_size, padding=0, stride=1):
in_shape = x.shape
ib, ic, ih, iw = in_shape
oh = (ih + padding * 2 - kernel_size) // stride + 1
ow = (iw + padding * 2 - kernel_size) // stride + 1
col_w = oh * ow
col_h = kernel_size * kernel_size * in_feature
column = np.zeros((ib, col_h, col_w))
output = np.zeros((ib, out_feature, oh, ow))
khalf = kernel_size // 2
kcol = np.array(kernel).reshape(out_feature, -1)
for b in range(ib):
for c in range(ic):
for oy in range(oh):
for ox in range(ow):
for ky in range(kernel_size):
for kx in range(kernel_size):
column_y = ky * kernel_size + kx + c * kernel_size * kernel_size
column_x = ox + oy * ow
ix = ox * stride + kx - padding
iy = oy * stride + ky - padding
if ix >= 0 and iy >= 0 and ix < iw and iy < ih:
column[b, column_y, column_x] = x[b, c, iy, ix]
output[b] = (kcol @ column[b]).reshape(out_feature, oh, ow) + np.array(bias).reshape(out_feature, 1, 1)
return output
def prelu(x, weigth):
x = x.copy()
channels = x.shape[1]
for c in range(channels):
current_channel = x[:, c, :, :]
select = current_channel < 0
current_channel[select] *= weigth[c]
return x
def max_pooling2d(x, kernel_size, stride):
ib, ic, ih, iw = x.shape
output_height = int(np.ceil((ih - kernel_size) / stride) + 1)
output_width = int(np.ceil((iw - kernel_size) / stride) + 1)
output = np.zeros((ib, ic, output_height, output_width))
minvalue = float("-inf")
for b in range(ib):
for c in range(ic):
for oy in range(output_height):
for ox in range(output_width):
value = minvalue
for kx in range(kernel_size):
for ky in range(kernel_size):
ix = ox * stride + kx
iy = oy * stride + ky
if ix < iw and iy < ih:
value = max(value, x[b, c, iy, ix])
output[b, c, oy, ox] = value
return output
class Conv2d:
def __init__(self, kernel, bias, in_feature, out_feature, kernel_size, padding, stride):
self.kernel = kernel
self.bias = bias
self.in_feature = in_feature
self.out_feature = out_feature
self.kernel_size = kernel_size
self.padding = padding
self.stride = stride
def forward(self, x):
return conv2d_forward(x, self.kernel, self.bias, self.in_feature, self.out_feature, self.kernel_size,
self.padding, self.stride)
class PReLU:
def __init__(self, weight):
self.weight = weight
def forward(self, x):
return prelu(x, self.weight)
class MaxPooling2d:
def __init__(self, kernel_size, stride):
self.kernel_size = kernel_size
self.stride = stride
def forward(self, x):
return max_pooling2d(x, self.kernel_size, self.stride)
class PNet:
def __init__(self, layer_mapper):
conv1_weight = layer_mapper["conv1"]
prelu1_weight = layer_mapper["PReLU1"]
conv2_weight = layer_mapper["conv2"]
prelu2_weight = layer_mapper["PReLU2"]
conv3_weight = layer_mapper["conv3"]
prelu3_weight = layer_mapper["PReLU3"]
self.layers = [
Conv2d(
kernel=conv1_weight.blobs[0].data,
bias=conv1_weight.blobs[1].data,
in_feature=3,
out_feature=10,
kernel_size=3,
padding=0,
stride=1
),
PReLU(prelu1_weight.blobs[0].data),
MaxPooling2d(2, 2),
Conv2d(
kernel=conv2_weight.blobs[0].data,
bias=conv2_weight.blobs[1].data,
in_feature=10,
out_feature=16,
kernel_size=3,
padding=0,
stride=1
),
PReLU(prelu2_weight.blobs[0].data),
Conv2d(
kernel=conv3_weight.blobs[0].data,
bias=conv3_weight.blobs[1].data,
in_feature=16,
out_feature=32,
kernel_size=3,
padding=0,
stride=1
),
PReLU(prelu3_weight.blobs[0].data)
]
conv41_weight = layer_mapper["conv4-1"]
self.conv41 = Conv2d(
kernel=conv41_weight.blobs[0].data,
bias=conv41_weight.blobs[1].data,
in_feature=32,
out_feature=2,
kernel_size=1,
padding=0,
stride=1
)
conv42_weight = layer_mapper["conv4-2"]
self.conv42 = Conv2d(
kernel=conv42_weight.blobs[0].data,
bias=conv42_weight.blobs[1].data,
in_feature=32,
out_feature=4,
kernel_size=1,
padding=0,
stride=1
)
def forward(self, image):
x = image
for layer in self.layers:
x = layer.forward(x)
return self.conv41.forward(x), self.conv42.forward(x)
pnet = PNet(layer_mapper)
pnet.forward(image)
我们换一个图片进行图形的预处理还有归一化
raw_image = cv2.imread("face.jpg")
# 转换颜色空间到RGB
# 对数据做归一化
input_image = cv2.cvtColor(raw_image, cv2.COLOR_BGR2RGB)
input_image = (input_image - 127.5) * 0.0078125
input_image = cv2.resize(input_image, (150, 150))
scale = input_image.shape[0] / raw_image.shape[0]
# h, w, c
# c, w, h
# matlab 的数据排布方式是基于列的
input_image = input_image.transpose(2, 1, 0)[None]
plt.imshow(raw_image[..., ::-1])
input_image.shape
建立BBOX为IOU进行准备
class BBox:
def __init__(self, x, y, r, b, score=0):
self.x, self.y, self.r, self.b, self.score = x, y, r, b, score
def __xor__(self, other):
'''
计算box和other的IoU
'''
cross = self & other
union = self | other
return cross / (union + 1e-6)
def __or__(self, other):
'''
计算box和other的并集
'''
cross = self & other
union = self.area + other.area - cross
return union
def __and__(self, other):
'''
计算box和other的交集
'''
xmax = min(self.r, other.r)
ymax = min(self.b, other.b)
xmin = max(self.x, other.x)
ymin = max(self.y, other.y)
cross_box = BBox(xmin, ymin, xmax, ymax)
if cross_box.width <= 0 or cross_box.height <= 0:
return 0
return cross_box.area
def locations(self):
return self.x, self.y, self.r, self.b
@property
def center(self):
return (self.x + self.r) / 2, (self.y + self.b) / 2
@property
def area(self):
return self.width * self.height
@property
def width(self):
return self.r - self.x + 1
@property
def height(self):
return self.b - self.y + 1
def __repr__(self):
return f"{{{self.x:.2f}, {self.y:.2f}, {self.r:.2f}, {self.b:.2f}, {self.score:.2f}}}"
构建nms
def nms(objs, iou_threshold):
objs = sorted(objs, key=lambda x:x.score, reverse=True)
removed_flags = [False] * len(objs)
keeps = []
for i in range(len(objs)):
if removed_flags[i]:
continue
base_box = objs[i]
keeps.append(base_box)
for j in range(i+1, len(objs)):
if removed_flags[j]:
continue
other_box = objs[j]
iou = base_box ^ other_box
if iou > iou_threshold:
removed_flags[j] = True
return keeps
conf.shape, reg.shape
#%%
ys, xs = np.where(conf[0, 1] > 0.7)
stride = 2
cellsize = 12
# input_image = 150 x 150
# raw_image = 500 x 500
# scale = input_image / raw_image
show = raw_image.copy()
objs = []
for y, x in zip(ys, xs):
score = conf[0, 1, y, x]
# 计算的是划窗的窗口位置
bx = (x * stride + 1) / scale
by = (y * stride + 1) / scale
br = (x * stride + cellsize) / scale
bb = (y * stride + cellsize) / scale
# reg: 1x4x70x70
regx = reg[0, 0, y, x]
regy = reg[0, 1, y, x]
regr = reg[0, 2, y, x]
regb = reg[0, 3, y, x]
bw = br - bx + 1
bh = bb - by + 1
bx = bx + regx * bw
by = by + regy * bh
br = br + regr * bw
bb = bb + regb * bh
objs.append(BBox(bx, by, br, bb, score))
#bx, by, br, bb = np.round([bx, by, br, bb]).astype(np.int32)
#cv2.rectangle(show, (bx, by), (br, bb), (0, 255, 0), 1)
print(len(objs))
objs = nms(objs, 0.7)
print(len(objs))
for obj in objs:
bx, by, br, bb = np.round(obj.locations()).astype(np.int32)
cv2.rectangle(show, (bx, by), (br, bb), (0, 255, 0), 1)
# NMS(Non-Maximum Suppression) 非极大值抑制
# Soft-NMS
plt.figure(figsize=(20, 20))
plt.imshow(show[..., ::-1])