为了验证我们训练的MTCNN的O-Net在训练集上的表现,我们写了一个测试代码,来测试训练图片的landmark的mean error。
landmark标签格式如下所示:
48/landmark/0.jpg -1 -1 -1 -1 -1 0.224199 0.505338 0.334520 0.327402 0.583630 0.364769 0.336299 0.596085 0.674377 0.635231
48/landmark/1.jpg -1 -1 -1 -1 -1 0.494662 0.775801 0.665480 0.416370 0.672598 0.336299 0.364769 0.596085 0.635231 0.674377
48/landmark/2.jpg -1 -1 -1 -1 -1 0.283217 0.559441 0.391608 0.384615 0.636364 0.435315 0.407343 0.662587 0.739510 0.701049
48/landmark/3.jpg -1 -1 -1 -1 -1 0.440559 0.716783 0.608392 0.363636 0.615385 0.407343 0.435315 0.662587 0.701049 0.739510
48/landmark/4.jpg -1 -1 -1 -1 -1 0.153846 0.457692 0.273077 0.265385 0.542308 0.505769 0.475000 0.755769 0.840385 0.798077
48/landmark/5.jpg -1 -1 -1 -1 -1 0.542308 0.846154 0.726923 0.457692 0.734615 0.475000 0.505769 0.755769 0.798077 0.840385
48/landmark/6.jpg -1 -1 -1 -1 -1 0.110092 0.472477 0.252294 0.243119 0.573394 0.392202 0.355505 0.690367 0.791284 0.740826
48/landmark/7.jpg -1 -1 -1 -1 -1 0.527523 0.889908 0.747706 0.426606 0.756881 0.355505 0.392202 0.690367 0.740826 0.791284
第1列表示:图片名;
第2列表示:是不是人脸,是人脸为1,不是人脸为0, ignore为-1;
第3-6列表示: 人脸的标签,即人脸相对于ground truch人脸左上角的偏移量;
第7-16列表示: 人脸landmark标签,即人脸相对于crop区域的归一化量。
训练样本如下图所示:
将48*48的训练样本输入到O-Net中,预测landmark位置, 测试代码如下所示:
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import sys
sys.path.append('D:\\Anaconda2\\libs')
import _init_paths
import caffe
import cv2
import math
import numpy as np
# from python_wrapper import *
import os
# 计算pts_gt, pts_pre的mean error
def computer_meanerror(pts_gt, pts_pre):
mean_error = []
d_outer = math.sqrt((pts_gt[2] - pts_gt[0]) * (pts_gt[2] - pts_gt[0]) + (pts_gt[3] - pts_gt[1]) * (pts_gt[3] - pts_gt[1]))
for j in range(5):
error = math.sqrt((pts_gt[2 * j] - pts_pre[2 * j]) * (pts_gt[2 * j] - pts_pre[2 * j]) + (pts_gt[2 * j + 1] - pts_pre[2 * j + 1]) * (pts_gt[2 * j + 1] - pts_pre[2 * j + 1]))
error = error / d_outer
mean_error.append(error)
return mean_error
# 画关键点
def drawlandmark(im, points):
for i in range(points.shape[0]):
for j in range(5):
cv2.circle(im, (int(points[i][j]), int(points[i][j + 5])), 2, (255, 0, 0), -1)
return im
# landmark检测程序
def detect_face(img, ONet):
# 定义temping数据格式和blob相同,即: num*channel*height*width
temping = np.zeros((1, 48, 48, 3))
temping[0,:,:,:] = img
tempimg = (temping - 127.5) * 0.0078125 # [0,255] -> [-1,1] , 归一化
# 对temping做轴变换,opencv读取img格式为:height*width*channel,变换后变为*channel*height*width
tempimg = np.swapaxes(tempimg, 1, 3)
tempimg = np.swapaxes(tempimg, 2, 3)
ONet.blobs['data'].reshape(1, 3, 48, 48)
ONet.blobs['data'].data[...] = tempimg
out = ONet.forward()
score = out['prob1'][:, 1] # 预测得分
points = out['conv6-3'] # landmark预测偏移量
point = []
for i in range(5):
point.append( points[0][i] * 48 )
point.append( points[0][i+5] * 48 )
point = np.array(point)
return point
# 判断这是否为一个主程序,其他python程序无法调用
if __name__ == '__main__':
infile_list = open('C:/mtcnn/48/landmark_48_1000.txt', 'r')
img_dir = "C:/mtcnn/"
caffe_model_path = "E:/mtcnn_DuinoDu/model"
threshold = [0.6, 0.7, 0.7]
caffe.set_mode_gpu()
# ONet = caffe.Net(caffe_model_path+"/det3.prototxt", caffe_model_path+"/48net_v5_7.caffemodel", caffe.TEST)
ONet = caffe.Net(caffe_model_path + "/det3.prototxt", "C:/mtcnn/train_V11_64/models_48_31" + "/_iter_140000.caffemodel", caffe.TEST)
landmark_num = 0
landmark_pos = 0
mean_error = [0, 0, 0, 0, 0]
mean_error = np.array(mean_error)
for name_list in infile_list.readlines():
landmark_num = landmark_num + 1
print landmark_num
align_gt = [] # landmark真实坐标位置
# align_pre = []
name_list = name_list.strip().split(' ')
image_name = img_dir = "C:/mtcnn/" + name_list[0]
img = cv2.imread(image_name)
for i in range(5):
align_gt.append(float(name_list[i + 6]))
align_gt.append(float(name_list[i + 11]))
align_gt = np.array(align_gt)
# 标签是相对于48*48的crop图像的归一化位置,进行反变换得到真实位置
for j in range(10):
align_gt[j] = align_gt[j] * 48
# forward fitch key point
align_pre = detect_face(img, ONet) # 预测的landmark坐标位置
#for i in range(5):
# cv2.circle(img, (int(align_gt[i]), int(align_gt[i+1])), 2, (255,0,0), -1)
cv2.circle(img, (int(align_pre[0]), int(align_pre[1])), 2, (255, 0, 0), -1)
cv2.circle(img, (int(align_pre[2]), int(align_pre[3])), 2, (255, 0, 0), -1)
cv2.circle(img, (int(align_pre[4]), int(align_pre[5])), 2, (255, 0, 0), -1)
cv2.circle(img, (int(align_pre[6]), int(align_pre[7])), 2, (255, 0, 0), -1)
cv2.circle(img, (int(align_pre[8]), int(align_pre[9])), 2, (255, 0, 0), -1)
#cv2.imshow("image", img)
#cv2.waitKey(1000)
error = []
# 计算landmark的 mean error
error = computer_meanerror(align_gt, align_pre)
error = np.array(error)
mean_error = mean_error + error
landmark_pos = landmark_pos + 1
mean_error = mean_error / float(landmark_pos)
print "left eye mean error:", mean_error[0]
print "right eye mean error:" , mean_error[1]
print "nose mean error:" , mean_error[2]
print "left mouth mean error:" , mean_error[3]
print "right mouth mean error:" , mean_error[4]
print "eye and mouth mean error:" ,(mean_error[0] + mean_error[1] + mean_error[3] + mean_error[4] ) / 4.0
infile_list.close()
顺便说一句:linux上pycaffe的测试代码,可以直接移植到windows上,只要将对应平台依赖的库替换即可,代码不需要修改。