MTCNN实践——人脸检测功能应用

 

  本人对MTCNN的具体算法实现没有进行过深入的了解,这里只是综合现有的资源使用MTCNN这套开源的库来实现人脸检测等功能。关于MTCNN的详细内容可以参考下面资料

    Multi task Cascaded Convolutional Networks

    MTCNN-将多任务级联卷积神经网络用于人脸检测和对齐

    MTCNN(Multi-task convolutional neural networks)人脸对齐

    MTCNN人脸检测 附完整C++代码 

  《Multi task Cascaded Convolutional Networks》是官方文档,有提供原始论文和MTCNN源代码。《MTCNN-将多任务级联卷积神经网络用于人脸检测和对齐》是对论文的有一个翻译,英文不够好的可以参考一下。《MTCNN(Multi-task convolutional neural networks)人脸对齐》是是MTCNN的一个解析。《MTCNN人脸检测 附完整C++代码》是对MTCNN进行的一个裁剪,使这个库更加的精简。

    下面代码是在《MTCNN人脸检测 附完整C++代码》中提供的代码上进行的一个移植,该文作者的代码应该应该是在Windows上做的测试的,实际在linux下还需要做一些其他设置。另外,他的代码如果使用交叉编译成arm平台应用程序,会出现问题,检测不到人脸。本文主要做的也就是把他的代码在linux系统中跑起来并且能够移植到嵌入式设备中去。

直接上代码:

#include "mtcnn.h"
#include "browse.h"
#define USE_SHELL_OPEN
#ifndef  nullptr
#define nullptr 0
#endif
#if defined(_MSC_VER)
#define _CRT_SECURE_NO_WARNINGS
#include <windows.h> 
#else
#include <unistd.h>
#endif
#define STB_IMAGE_STATIC
#define STB_IMAGE_IMPLEMENTATION

#include "stb_image.h"
//ref:https://github.com/nothings/stb/blob/master/stb_image.h
#define TJE_IMPLEMENTATION

#include "tiny_jpeg.h"
//ref:https://github.com/serge-rgb/TinyJPEG/blob/master/tiny_jpeg.h

#include <stdint.h>
#include <time.h>
#include "timing.h"

#ifndef _MAX_DRIVE
#define _MAX_DRIVE 3
#endif
#ifndef _MAX_FNAME
#define _MAX_FNAME 256
#endif
#ifndef _MAX_EXT
#define _MAX_EXT 256
#endif
#ifndef _MAX_DIR
#define _MAX_DIR 256
#endif

char saveFile[1024];

unsigned char *loadImage(const char *filename, int *Width, int *Height, int *Channels) {
	return stbi_load(filename, Width, Height, Channels, 0);
}

void saveImage(const char *filename, int Width, int Height, int Channels, unsigned char *Output) {
	memcpy(saveFile + strlen(saveFile), filename, strlen(filename));
	*(saveFile + strlen(saveFile) + 1) = 0;
	//保存为jpg
	if (!tje_encode_to_file(saveFile, Width, Height, Channels, true, Output)) {
		fprintf(stderr, "save JPEG fail.\n");
		return;
	}

#ifdef USE_SHELL_OPEN
	browse(saveFile);
#endif
}

void splitpath(const char *path, char *drv, char *dir, char *name, char *ext) {
	const char *end;
	const char *p;
	const char *s;
	if (path[0] && path[1] == ':') {
		if (drv) {
			*drv++ = *path++;
			*drv++ = *path++;
			*drv = '\0';
		}
	}
	else if (drv)
		*drv = '\0';
	for (end = path; *end && *end != ':';)
		end++;
	for (p = end; p > path && *--p != '\\' && *p != '/';)
		if (*p == '.') {
			end = p;
			break;
		}
	if (ext)
		for (s = end; (*ext = *s++);)
			ext++;
	for (p = end; p > path;)
		if (*--p == '\\' || *p == '/') {
			p++;
			break;
		}
	if (name) {
		for (s = p; s < end;)
			*name++ = *s++;
		*name = '\0';
	}
	if (dir) {
		for (s = path; s < p;)
			*dir++ = *s++;
		*dir = '\0';
	}
}

void getCurrentFilePath(const char *filePath, char *saveFile) {
	char drive[_MAX_DRIVE];
	char dir[_MAX_DIR];
	char fname[_MAX_FNAME];
	char ext[_MAX_EXT];
	splitpath(filePath, drive, dir, fname, ext);
	size_t n = strlen(filePath);
	memcpy(saveFile, filePath, n);
	char *cur_saveFile = saveFile + (n - strlen(ext));
	cur_saveFile[0] = '_';
	cur_saveFile[1] = 0;
}

void drawPoint(unsigned char *bits, int width, int depth, int x, int y, const uint8_t *color) {
	for (int i = 0; i < min(depth, 3); ++i) {
		bits[(y * width + x) * depth + i] = color[i];
	}
}

void drawLine(unsigned char *bits, int width, int depth, int startX, int startY, int endX, int endY,
	const uint8_t *col) {
	if (endX == startX) {
		if (startY > endY) {
			int a = startY;
			startY = endY;
			endY = a;
		}
		for (int y = startY; y <= endY; y++) {
			drawPoint(bits, width, depth, startX, y, col);
		}
	}
	else {
		float m = 1.0f * (endY - startY) / (endX - startX);
		int y = 0;
		if (startX > endX) {
			int a = startX;
			startX = endX;
			endX = a;
		}
		for (int x = startX; x <= endX; x++) {
			y = (int)(m * (x - startX) + startY);
			drawPoint(bits, width, depth, x, y, col);
		}
	}
}

void drawRectangle(unsigned char *bits, int width, int depth, int x1, int y1, int x2, int y2, const uint8_t *col) {
	drawLine(bits, width, depth, x1, y1, x2, y1, col);
	drawLine(bits, width, depth, x2, y1, x2, y2, col);
	drawLine(bits, width, depth, x2, y2, x1, y2, col);
	drawLine(bits, width, depth, x1, y2, x1, y1, col);
}

#ifndef MAX
#define MAX(a, b) (((a) > (b)) ? (a): (b))
#endif
#ifndef MIN
#define MIN(a, b) (((a) > (b)) ? (b): (a))
#endif

unsigned char ClampToByte(int Value) {
    return ((Value | ((signed int) (255 - Value) >> 31)) & ~((signed int) Value >> 31));
}

int Clamp(int Value, int Min, int Max) {
    if (Value < Min)
        return Min;
    else if (Value > Max)
        return Max;
    else
        return Value;
}

void RemoveRedEyes(unsigned char *input, unsigned char *output, int width, int height, int depth, int CenterX, int CenterY,
              int Radius) {
    if (depth < 3) return;
    if ((input == nullptr) || (output == nullptr)) return;
    if ((width <= 0) || (height <= 0)) return;

    int Left = Clamp(CenterX - Radius, 0, width);
    int Top = Clamp(CenterY - Radius, 0, height);
    int Right = Clamp(CenterX + Radius, 0, width);
    int Bottom = Clamp(CenterY + Radius, 0, height);
    int PowRadius = Radius * Radius;

    for (int Y = Top; Y < Bottom; Y++) {
        unsigned char *in_scanline = input + Y * width * depth + Left * depth;
        unsigned char *out_scanline = output + Y * width * depth + Left * depth;
        int OffsetY = Y - CenterY;
        for (int X = Left; X < Right; X++) {
            int OffsetX = X - CenterX;
            int dis = OffsetX * OffsetX + OffsetY * OffsetY;
            if (dis <= PowRadius) {
                float bluf = 0;
                int Red = in_scanline[0];
                int Green = in_scanline[1];
                int Blue = in_scanline[2];
                int nrv = Blue + Green;
                if (nrv < 1) nrv = 1;
                if (Green > 1)
					bluf = (float) Blue / Green;
                else
                    bluf = (float) Blue;
                bluf = MAX(0.5f, MIN(1.5f, sqrt(bluf)));
                float redq = (float) Red / nrv * bluf;
                if (redq > 0.7f) {
                    float powr = 1.775f - (redq * 0.75f +
                                           0.25f);
                    if (powr < 0) powr = 0;
                    powr = powr * powr;
                    float powb = 0.5f + powr * 0.5f;
                    float powg = 0.75f + powr * 0.25f;
                    out_scanline[0] = ClampToByte(powr * Red + 0.5f);
                    out_scanline[1] = ClampToByte(powg * Green + 0.5f);
                    out_scanline[2] = ClampToByte(powb * Blue + 0.5f);
                }
            }
            in_scanline += depth;
            out_scanline += depth;
        }
    }
}

void RotateBilinear(unsigned char *sourceData, int width, int height, int Channels, int RowBytes,
                    unsigned char *destinationData, int newWidth, int newHeight, float angle, bool keepSize = true,
                    int fillColorR = 255, int fillColorG = 255, int fillColorB = 255) {
    if (sourceData == NULL || destinationData == NULL) return;

    float oldXradius = (float) (width - 1) / 2;
    float oldYradius = (float) (height - 1) / 2;

    float newXradius = (float) (newWidth - 1) / 2;
    float newYradius = (float) (newHeight - 1) / 2;

    double MPI = 3.14159265358979323846;
    double angleRad = -angle * MPI / 180.0;
    float angleCos = (float) cos(angleRad);
    float angleSin = (float) sin(angleRad);

    int srcStride = RowBytes;
    int dstOffset = newWidth * Channels - ((Channels == 1) ? newWidth : newWidth * Channels);

    unsigned char fillR = fillColorR;
    unsigned char fillG = fillColorG;
    unsigned char fillB = fillColorB;

    unsigned char *src = (unsigned char *) sourceData;
    unsigned char *dst = (unsigned char *) destinationData;

    int ymax = height - 1;
    int xmax = width - 1;
    if (Channels == 1) {
        float cy = -newYradius;
        for (int y = 0; y < newHeight; y++) {
            float tx = angleSin * cy + oldXradius;
            float ty = angleCos * cy + oldYradius;

            float cx = -newXradius;
            for (int x = 0; x < newWidth; x++, dst++) {
                float ox = tx + angleCos * cx;
                float oy = ty - angleSin * cx;

                int ox1 = (int) ox;
                int oy1 = (int) oy;

                if ((ox1 < 0) || (oy1 < 0) || (ox1 >= width) || (oy1 >= height)) {
                    *dst = fillG;
                } else {
                    int ox2 = (ox1 == xmax) ? ox1 : ox1 + 1;
                    int oy2 = (oy1 == ymax) ? oy1 : oy1 + 1;
                    float dx1 = 0;
                    if ((dx1 = ox - (float) ox1) < 0)
                        dx1 = 0;
                    float dx2 = 1.0f - dx1;
                    float dy1 = 0;
                    if ((dy1 = oy - (float) oy1) < 0)
                        dy1 = 0;
                    float dy2 = 1.0f - dy1;

                    unsigned char *p1 = src + oy1 * srcStride;
                    unsigned char *p2 = src + oy2 * srcStride;

                    *dst = (unsigned char) (dy2 * (dx2 * p1[ox1] + dx1 * p1[ox2]) +
                                            dy1 * (dx2 * p2[ox1] + dx1 * p2[ox2]));
                }
                cx++;
            }
            cy++;
            dst += dstOffset;
        }
    } else if (Channels == 3) {
        float cy = -newYradius;
        for (int y = 0; y < newHeight; y++) {
            float tx = angleSin * cy + oldXradius;
            float ty = angleCos * cy + oldYradius;

            float cx = -newXradius;
            for (int x = 0; x < newWidth; x++, dst += Channels) {
                float ox = tx + angleCos * cx;
                float oy = ty - angleSin * cx;

                int ox1 = (int) ox;
                int oy1 = (int) oy;

                if ((ox1 < 0) || (oy1 < 0) || (ox1 >= width) || (oy1 >= height)) {
                    dst[0] = fillR;
                    dst[1] = fillG;
                    dst[2] = fillB;
                } else {
                    int ox2 = (ox1 == xmax) ? ox1 : ox1 + 1;
                    int oy2 = (oy1 == ymax) ? oy1 : oy1 + 1;

                    float dx1 = 0;
                    if ((dx1 = ox - (float) ox1) < 0)
                        dx1 = 0;
                    float dx2 = 1.0f - dx1;
                    float dy1 = 0;
                    if ((dy1 = oy - (float) oy1) < 0)
                        dy1 = 0;
                    float dy2 = 1.0f - dy1;

                    unsigned char *p1 = src + oy1 * srcStride;
                    unsigned char *p2 = p1;
                    p1 += ox1 * Channels;
                    p2 += ox2 * Channels;

                    unsigned char *p3 = src + oy2 * srcStride;
                    unsigned char *p4 = p3;
                    p3 += ox1 * Channels;
                    p4 += ox2 * Channels;

                    dst[0] = (unsigned char) (
                            dy2 * (dx2 * p1[0] + dx1 * p2[0]) +
                            dy1 * (dx2 * p3[0] + dx1 * p4[0]));

                    dst[1] = (unsigned char) (
                            dy2 * (dx2 * p1[1] + dx1 * p2[1]) +
                            dy1 * (dx2 * p3[1] + dx1 * p4[1]));

                    dst[2] = (unsigned char) (
                            dy2 * (dx2 * p1[2] + dx1 * p2[2]) +
                            dy1 * (dx2 * p3[2] + dx1 * p4[2]));
                }
                cx++;
            }
            cy++;
            dst += dstOffset;
        }
    } else if (Channels == 4) {
        float cy = -newYradius;
        for (int y = 0; y < newHeight; y++) {
            float tx = angleSin * cy + oldXradius;
            float ty = angleCos * cy + oldYradius;

            float cx = -newXradius;
            for (int x = 0; x < newWidth; x++, dst += Channels) {
                float ox = tx + angleCos * cx;
                float oy = ty - angleSin * cx;

                int ox1 = (int) ox;
                int oy1 = (int) oy;

                if ((ox1 < 0) || (oy1 < 0) || (ox1 >= width) || (oy1 >= height)) {
                    dst[0] = fillR;
                    dst[1] = fillG;
                    dst[2] = fillB;
                    dst[3] = 255;
                } else {
                    int ox2 = (ox1 == xmax) ? ox1 : ox1 + 1;
                    int oy2 = (oy1 == ymax) ? oy1 : oy1 + 1;

                    float dx1 = 0;
                    if ((dx1 = ox - (float) ox1) < 0)
                        dx1 = 0;
                    float dx2 = 1.0f - dx1;
                    float dy1 = 0;
                    if ((dy1 = oy - (float) oy1) < 0)
                        dy1 = 0;
                    float dy2 = 1.0f - dy1;

                    unsigned char *p1 = src + oy1 * srcStride;
                    unsigned char *p2 = p1;
                    p1 += ox1 * Channels;
                    p2 += ox2 * Channels;

                    unsigned char *p3 = src + oy2 * srcStride;
                    unsigned char *p4 = p3;
                    p3 += ox1 * Channels;
                    p4 += ox2 * Channels;

                    dst[0] = (unsigned char) (
                            dy2 * (dx2 * p1[0] + dx1 * p2[0]) +
                            dy1 * (dx2 * p3[0] + dx1 * p4[0]));

                    dst[1] = (unsigned char) (
                            dy2 * (dx2 * p1[1] + dx1 * p2[1]) +
                            dy1 * (dx2 * p3[1] + dx1 * p4[1]));

                    dst[2] = (unsigned char) (
                            dy2 * (dx2 * p1[2] + dx1 * p2[2]) +
                            dy1 * (dx2 * p3[2] + dx1 * p4[2]));
                    dst[3] = 255;
                }
                cx++;
            }
            cy++;
            dst += dstOffset;
        }
    }
}

void facialPoseCorrection(unsigned char *inputImage, int Width, int Height, int Channels, int left_eye_x,
                          int left_eye_y,
                          int right_eye_x, int right_eye_y) {
    float diffEyeX = right_eye_x - left_eye_x;
    float diffEyeY = right_eye_y - left_eye_y;

    float fAngle;
    float pi = 3.1415926535897932384626433832795f;
    if (fabs(diffEyeX) < 0.0000001f)
        fAngle = 0.f;
    else
        fAngle = atanf(diffEyeY / diffEyeX) * 180.0f / pi;
    size_t numberOfPixels = Width * Height * Channels * sizeof(unsigned char);
    unsigned char *outputImage = (unsigned char *) malloc(numberOfPixels);
    if (outputImage != nullptr) {
        RotateBilinear(inputImage, Width, Height, Channels, Width * Channels, outputImage, Width, Height, fAngle);
        memcpy(inputImage, outputImage, numberOfPixels);
        free(outputImage);
    }
}

int main(int argc, char **argv) {
	printf("mtcnn face detection\n");
	printf("blog:http://cpuimage.cnblogs.com/\n");

	if (argc < 2) {
		printf("usage: %s  model_path image_file \n ", argv[0]);
		printf("eg: %s  ../models ../sample.jpg \n ", argv[0]);
		printf("press any key to exit. \n");
		getchar();
		return 0;
	}
	const char *model_path = argv[1];
	char *szfile = argv[2];
	getCurrentFilePath(szfile, saveFile);
	int Width = 0;
	int Height = 0;
	int Channels = 0;
	unsigned char *inputImage = loadImage(szfile, &Width, &Height, &Channels);
	if (inputImage == nullptr || Channels != 3) return -1;
	ncnn::Mat ncnn_img = ncnn::Mat::from_pixels(inputImage, ncnn::Mat::PIXEL_RGB, Width, Height);
	std::vector<Bbox> finalBbox;
	MTCNN mtcnn(model_path);
	double startTime = now();
	mtcnn.detect(ncnn_img, finalBbox);
	double nDetectTime = calcElapsed(startTime, now());
	printf("time: %d ms.\n ", (int)(nDetectTime * 1000));
	int num_box = finalBbox.size();
	printf("face num: %u \n", num_box);
    //bool draw_face_feat = false;
    bool draw_face_feat = true;
    int left_eye_x = 0;
    int left_eye_y = 0;
    int right_eye_x = 0;
    int right_eye_y = 0;
    for (int i = 0; i < num_box; i++) {
        if (draw_face_feat) {
            const uint8_t red[3] = {255, 0, 0};

            drawRectangle(inputImage, Width, Channels, finalBbox[i].x1, finalBbox[i].y1,
                          finalBbox[i].x2,
                          finalBbox[i].y2, red);
            const uint8_t blue[3] = {0, 0, 255};

            for (int num = 0; num < 5; num++) {
                drawPoint(inputImage, Width, Channels, (int) (finalBbox[i].ppoint[num] + 0.5f),
                          (int) (finalBbox[i].ppoint[num + 5] + 0.5f), blue);
            }
        }
        left_eye_x = (int) (finalBbox[i].ppoint[0] + 0.5f);
        left_eye_y = (int) (finalBbox[i].ppoint[5] + 0.5f);
        right_eye_x = (int) (finalBbox[i].ppoint[1] + 0.5f);
        right_eye_y = (int) (finalBbox[i].ppoint[6] + 0.5f);
        int dis_eye = (int) sqrtf((right_eye_x - left_eye_x) * (right_eye_x - left_eye_x) +
                                  (right_eye_y - left_eye_y) * (right_eye_y - left_eye_y));
        int radius = MAX(1, dis_eye / 9);
        RemoveRedEyes(inputImage, inputImage, Width, Height, Channels, left_eye_x, left_eye_y, radius);
        RemoveRedEyes(inputImage, inputImage, Width, Height, Channels, right_eye_x, right_eye_y, radius);
    }
    facialPoseCorrection(inputImage, Width, Height, Channels, left_eye_x, left_eye_y, right_eye_x, right_eye_y);
    saveImage("_done.jpg", Width, Height, Channels, inputImage);
    free(inputImage);
    printf("press any key to exit. \n");
    getchar();
    return 0;
}

完整工程文件如下:

licaibiao@ubuntu:~/MTCNN/MTCNN$ ls
imag  Makefile  models  readme  src
licaibiao@ubuntu:~/MTCNN/MTCNN$ tree
.
├── imag
│   ├── 001.jpg
│   └── 002.jpg
├── Makefile
├── models
│   ├── det1.bin
│   ├── det1.param
│   ├── det2.bin
│   ├── det2.param
│   ├── det3.bin
│   └── det3.param
├── readme
└── src
    ├── browse.h
    ├── main.cpp
    ├── mtcnn.cpp
    ├── mtcnn.h
    ├── ncnn
    │   ├── blob.cpp
    │   ├── blob.h
    │   ├── layer
    │   │   ├── arm
    │   │   │   ├── convolution_1x1.h
    │   │   │   ├── convolution_2x2.h
    │   │   │   ├── convolution_3x3.h
    │   │   │   ├── convolution_4x4.h
    │   │   │   ├── convolution_5x5.h
    │   │   │   ├── convolution_7x7.h
    │   │   │   ├── convolution_arm.cpp
    │   │   │   ├── convolution_arm.h
    │   │   │   ├── innerproduct_arm.cpp
    │   │   │   ├── innerproduct_arm.h
    │   │   │   ├── neon_mathfun.h
    │   │   │   ├── pooling_2x2.h
    │   │   │   ├── pooling_3x3.h
    │   │   │   ├── pooling_arm.cpp
    │   │   │   ├── pooling_arm.h
    │   │   │   ├── prelu_arm.cpp
    │   │   │   ├── prelu_arm.h
    │   │   │   ├── softmax_arm.cpp
    │   │   │   └── softmax_arm.h
    │   │   ├── convolution.cpp
    │   │   ├── convolution.h
    │   │   ├── dropout.cpp
    │   │   ├── dropout.h
    │   │   ├── innerproduct.cpp
    │   │   ├── innerproduct.h
    │   │   ├── input.cpp
    │   │   ├── input.h
    │   │   ├── pooling.cpp
    │   │   ├── pooling.h
    │   │   ├── prelu.cpp
    │   │   ├── prelu.h
    │   │   ├── softmax.cpp
    │   │   ├── softmax.h
    │   │   ├── split.cpp
    │   │   ├── split.h
    │   │   └── x86
    │   │       ├── avx_mathfun.h
    │   │       ├── convolution_3x3.h
    │   │       ├── convolution_5x5.h
    │   │       ├── convolution_x86.cpp
    │   │       ├── convolution_x86.h
    │   │       └── sse_mathfun.h
    │   ├── layer.cpp
    │   ├── layer.h
    │   ├── layer_type.h
    │   ├── mat.cpp
    │   ├── mat.h
    │   ├── mat_pixel.cpp
    │   ├── net.cpp
    │   ├── net.h
    │   ├── paramdict.cpp
    │   ├── paramdict.h
    │   └── platform.h
    ├── stb_image.h
    ├── timing.h
    └── tiny_jpeg.h

7 directories, 71 files
licaibiao@ubuntu:~/MTCNN/MTCNN$ 

我的运行环境是Ubuntu16.04,要编译运行上面的代码需要注意:

    1.使用的gcc 必须支持C++11标准,否则编译不过。

    2.有些系统需要连接librt.so 连接时使用 -lrt ;如果这样还连接不到,可以直接链接库,比如我Makefile的/usr/lib/x86_64-linux-gnu/librt.so

    3.如果要在Ubuntu浏览器上显示运行结果,需要设置设置变量export DISPLAY=:0

运行结果如下:

    上面的工程是我从cpuimage的《MTCNN人脸检测 附完整C++代码》中移植到linux的,实际它的检测效果比OpenCV还是要好很多的。这里面还有很多参数可以调节。下面的图片是MTCNN官方提供的检测结果。我没有去搭他的环境,直接上一个他的结果吧,从结果上看还是非常厉害的。

    从这里可以下载我使用到的测试程序:MTCNN人脸检测应用实例代码

评论 4
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

li_wen01

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值