pnet网络完善

归一码字

已于 2024-06-17 14:22:44 修改

阅读量218

点赞数 1

文章标签： python opencv 开发语言

于 2024-05-13 13:54:20 首次发布

本文链接：https://blog.csdn.net/m0_46221545/article/details/138799993

版权

这是原先的代码

import numpy as np
import mtcnn.caffe_pb2 as pb
import cv2
from matplotlib import pyplot as plt

raw_image = cv2.imread("face.jpg")
input_image = cv2.cvtColor(raw_image, cv2.COLOR_BGR2YCrCb)
input_image = (input_image - 127.5) * 0.0078125
origin_h, origin_w, _ = input_image.shape


input_image = cv2.resize(input_image,(150,150))
#input_image.shape
def calculateScales(img):
    pr_scale = 1.0
    h, w, _ = img.shape

    if min(w, h) > 500:
        pr_scale = 500.0 / min(h, w)
        w = int(w * pr_scale)
        h = int(h * pr_scale)
    elif max(w, h) < 500:
        pr_scale = 500.0 / max(h, w)
        w = int(w * pr_scale)
        h = int(h * pr_scale)

    scales = []
    factor = 0.709
    factor_count = 0
    minl = min(h, w)
    while minl >= 12:
        scales.append(pr_scale * pow(factor, factor_count))
        minl *= factor
        factor_count += 1
    return scales


scales = calculateScales(input_image)
scales
net = pb.NetParameter()
with open(r"D:\BaiduNetdiskDownload\\2020\12.14MTCNN-BP-Conv\mtcnn\det1.caffemodel", "rb") as f:
    net.ParseFromString(f.read())

layer_mapper = {item.name: item for item in net.layer}
image = cv2.imread("my.jpg")
image.shape
image = image.transpose(2, 0, 1)[None]
image.shape
layer_mapper


class Initializer:
    def __init__(self, name):
        self.name = name

    def __call__(self, *args):
        return self.apply(*args)


class GaussInitializer(Initializer):
    # where :math:`\mu` is the mean and :math:`\sigma` the standard
    # deviation. The square of the standard deviation, :math:`\sigma^2`,
    # is called the variance.
    def __init__(self, mu, sigma):
        self.mu = mu
        self.sigma = sigma

    def apply(self, value):
        value[...] = np.random.normal(self.mu, self.sigma, value.shape)


class Parameter:
    def __init__(self, value):
        self.value = value
        self.delta = np.zeros(value.shape)

    def zero_grad(self):
        self.delta[...] = 0


def conv2d_forward(x, kernel, bias, in_feature, out_feature, kernel_size, padding=0, stride=1):
    in_shape = x.shape
    ib, ic, ih, iw = in_shape
    oh = (ih + padding * 2 - kernel_size) // stride + 1
    ow = (iw + padding * 2 - kernel_size) // stride + 1
    col_w = oh * ow
    col_h = kernel_size * kernel_size * in_feature
    column = np.zeros((ib, col_h, col_w))
    output = np.zeros((ib, out_feature, oh, ow))
    khalf = kernel_size // 2
    kcol = np.array(kernel).reshape(out_feature, -1)
    for b in range(ib):
        for c in range(ic):
            for oy in range(oh):
                for ox in range(ow):
                    for ky in range(kernel_size):
                        for kx in range(kernel_size):
                            column_y = ky * kernel_size + kx + c * kernel_size * kernel_size
                            column_x = ox + oy * ow
                            ix = ox * stride + kx - padding
                            iy = oy * stride + ky - padding
                            if ix >= 0 and iy >= 0 and ix < iw and iy < ih:
                                column[b, column_y, column_x] = x[b, c, iy, ix]
        output[b] = (kcol @ column[b]).reshape(out_feature, oh, ow) + np.array(bias).reshape(out_feature, 1, 1)
    return output


def prelu(x, weigth):
    x = x.copy()

    channels = x.shape[1]
    for c in range(channels):
        current_channel = x[:, c, :, :]
        select = current_channel < 0
        current_channel[select] *= weigth[c]
    return x


def max_pooling2d(x, kernel_size, stride):
    ib, ic, ih, iw = x.shape
    output_height = int(np.ceil((ih - kernel_size) / stride) + 1)
    output_width = int(np.ceil((iw - kernel_size) / stride) + 1)
    output = np.zeros((ib, ic, output_height, output_width))
    minvalue = float("-inf")

    for b in range(ib):
        for c in range(ic):
            for oy in range(output_height):
                for ox in range(output_width):
                    value = minvalue
                    for kx in range(kernel_size):
                        for ky in range(kernel_size):
                            ix = ox * stride + kx
                            iy = oy * stride + ky
                            if ix < iw and iy < ih:
                                value = max(value, x[b, c, iy, ix])
                    output[b, c, oy, ox] = value
    return output


class Conv2d:
    def __init__(self, kernel, bias, in_feature, out_feature, kernel_size, padding, stride):
        self.kernel = kernel
        self.bias = bias
        self.in_feature = in_feature
        self.out_feature = out_feature
        self.kernel_size = kernel_size
        self.padding = padding
        self.stride = stride

    def forward(self, x):
        return conv2d_forward(x, self.kernel, self.bias, self.in_feature, self.out_feature, self.kernel_size,
                              self.padding, self.stride)


class PReLU:
    def __init__(self, weight):
        self.weight = weight

    def forward(self, x):
        return prelu(x, self.weight)


class MaxPooling2d:
    def __init__(self, kernel_size, stride):
        self.kernel_size = kernel_size
        self.stride = stride

    def forward(self, x):
        return max_pooling2d(x, self.kernel_size, self.stride)


class PNet:
    def __init__(self, layer_mapper):
        conv1_weight = layer_mapper["conv1"]
        prelu1_weight = layer_mapper["PReLU1"]
        conv2_weight = layer_mapper["conv2"]
        prelu2_weight = layer_mapper["PReLU2"]
        conv3_weight = layer_mapper["conv3"]
        prelu3_weight = layer_mapper["PReLU3"]

        self.layers = [
            Conv2d(
                kernel=conv1_weight.blobs[0].data,
                bias=conv1_weight.blobs[1].data,
                in_feature=3,
                out_feature=10,
                kernel_size=3,
                padding=0,
                stride=1
            ),
            PReLU(prelu1_weight.blobs[0].data),
            MaxPooling2d(2, 2),
            Conv2d(
                kernel=conv2_weight.blobs[0].data,
                bias=conv2_weight.blobs[1].data,
                in_feature=10,
                out_feature=16,
                kernel_size=3,
                padding=0,
                stride=1
            ),
            PReLU(prelu2_weight.blobs[0].data),
            Conv2d(
                kernel=conv3_weight.blobs[0].data,
                bias=conv3_weight.blobs[1].data,
                in_feature=16,
                out_feature=32,
                kernel_size=3,
                padding=0,
                stride=1
            ),
            PReLU(prelu3_weight.blobs[0].data)
        ]

        conv41_weight = layer_mapper["conv4-1"]
        self.conv41 = Conv2d(
            kernel=conv41_weight.blobs[0].data,
            bias=conv41_weight.blobs[1].data,
            in_feature=32,
            out_feature=2,
            kernel_size=1,
            padding=0,
            stride=1
        )

        conv42_weight = layer_mapper["conv4-2"]
        self.conv42 = Conv2d(
            kernel=conv42_weight.blobs[0].data,
            bias=conv42_weight.blobs[1].data,
            in_feature=32,
            out_feature=4,
            kernel_size=1,
            padding=0,
            stride=1
        )

    def forward(self, image):
        x = image
        for layer in self.layers:
            x = layer.forward(x)

        return self.conv41.forward(x), self.conv42.forward(x)


pnet = PNet(layer_mapper)
pnet.forward(image)

我们换一个图片进行图形的预处理还有归一化

raw_image = cv2.imread("face.jpg")
# 转换颜色空间到RGB
# 对数据做归一化
input_image = cv2.cvtColor(raw_image, cv2.COLOR_BGR2RGB)
input_image = (input_image - 127.5) * 0.0078125
input_image = cv2.resize(input_image, (150, 150))
scale = input_image.shape[0] / raw_image.shape[0]

# h, w, c
# c, w, h
# matlab 的数据排布方式是基于列的
input_image = input_image.transpose(2, 1, 0)[None]
plt.imshow(raw_image[..., ::-1])
input_image.shape

建立BBOX为IOU进行准备

class BBox:
    def __init__(self, x, y, r, b, score=0):
        self.x, self.y, self.r, self.b, self.score = x, y, r, b, score

    def __xor__(self, other):
        '''
        计算box和other的IoU
        '''
        cross = self & other
        union = self | other
        return cross / (union + 1e-6)

    def __or__(self, other):
        '''
        计算box和other的并集
        '''
        cross = self & other
        union = self.area + other.area - cross
        return union

    def __and__(self, other):
        '''
        计算box和other的交集
        '''
        xmax = min(self.r, other.r)
        ymax = min(self.b, other.b)
        xmin = max(self.x, other.x)
        ymin = max(self.y, other.y)
        cross_box = BBox(xmin, ymin, xmax, ymax)
        if cross_box.width <= 0 or cross_box.height <= 0:
            return 0

        return cross_box.area

    def locations(self):
        return self.x, self.y, self.r, self.b

    @property
    def center(self):
        return (self.x + self.r) / 2, (self.y + self.b) / 2

    @property
    def area(self):
        return self.width * self.height

    @property
    def width(self):
        return self.r - self.x + 1

    @property
    def height(self):
        return self.b - self.y + 1

    def __repr__(self):
        return f"{{{self.x:.2f}, {self.y:.2f}, {self.r:.2f}, {self.b:.2f}, {self.score:.2f}}}"

构建nms

def nms(objs, iou_threshold):

    objs = sorted(objs, key=lambda x:x.score, reverse=True)
    removed_flags = [False] * len(objs)
    keeps = []

    for i in range(len(objs)):

        if removed_flags[i]:
            continue

        base_box = objs[i]
        keeps.append(base_box)
        for j in range(i+1, len(objs)):

            if removed_flags[j]:
                continue

            other_box = objs[j]
            iou = base_box ^ other_box

            if iou > iou_threshold:
                removed_flags[j] = True
    return keeps

conf.shape, reg.shape
#%%
ys, xs = np.where(conf[0, 1] > 0.7)
stride = 2
cellsize = 12

# input_image = 150 x 150
# raw_image = 500 x 500
# scale = input_image / raw_image
show = raw_image.copy()
objs = []
for y, x in zip(ys, xs):
    score  = conf[0, 1, y, x]

    # 计算的是划窗的窗口位置
    bx = (x * stride + 1) / scale
    by = (y * stride + 1) / scale
    br = (x * stride + cellsize) / scale
    bb = (y * stride + cellsize) / scale

    # reg: 1x4x70x70
    regx = reg[0, 0, y, x]
    regy = reg[0, 1, y, x]
    regr = reg[0, 2, y, x]
    regb = reg[0, 3, y, x]

    bw = br - bx + 1
    bh = bb - by + 1
    bx = bx + regx * bw
    by = by + regy * bh
    br = br + regr * bw
    bb = bb + regb * bh

    objs.append(BBox(bx, by, br, bb, score))
    #bx, by, br, bb = np.round([bx, by, br, bb]).astype(np.int32)
    #cv2.rectangle(show, (bx, by), (br, bb), (0, 255, 0), 1)

print(len(objs))
objs = nms(objs, 0.7)
print(len(objs))

for obj in objs:
    bx, by, br, bb = np.round(obj.locations()).astype(np.int32)
    cv2.rectangle(show, (bx, by), (br, bb), (0, 255, 0), 1)

# NMS(Non-Maximum Suppression)   非极大值抑制
# Soft-NMS
plt.figure(figsize=(20, 20))
plt.imshow(show[..., ::-1])