英文原文:A convolutional Neural Network Cascade for Face Detection(CascadeCNN)
<span style="font-size:24px;">#功能描述:调用caffemodel,检测一幅图片上所有人脸,将检测到的人脸区域及得分输出到txt文件中
#用途:在FDDB上测试该算法性能
import numpy as np #numpy:提供矩阵运算功能的库
import cv2 #cv2:opencv库
import time #time:时间模块
from operator import itemgetter #operator提供一系列函数,itemgetter函数用于获取对象哪些维的数据
from load_model_functions import * #从‘load_model_functions’文件中导入所有函数、类.....
from face_detection_functions import *
# ================== caffe ======================================
caffe_root = '/home/xiao/caffe/' #caffe根目录
import sys #导入系统相关的信息模块
#把ipython的路径改到我们之前指定的地方,以便可以调入caffe模块,否则,import这个指令会在当前目录下查找,找不到caffe
sys.path.insert(0, caffe_root + 'python')
import caffe
# ================== load models ======================================
#加载模型,代码见‘load_model_functions’中‘load_face_models’函数
net_12c_full_conv, net_12_cal, net_24c, net_24_cal, net_48c, net_48_cal = \
load_face_models(softQuantize=True)
#定义12cal校准层,本层用于校准人脸框
def cal_face_12c(net_12_cal, caffe_img, rectangles):
'''
:param caffe_image: image in caffe style to detect faces
:param rectangles: rectangles in form [x11, y11, x12, y12, confidence, current_scale] #矩形框参数
:return: rectangles after calibration
'''
height, width, channels = caffe_img.shape #H:高,W:宽,C:通道数
result = []
for cur_rectangle in rectangles: #rectangles:所有预测框,cur_rectangle:当前1个预测框
original_x1 = cur_rectangle[0] #图像中左上角(x1,y1)为矩形框原点,即x轴与y轴交接点
original_y1 = cur_rectangle[1]
original_x2 = cur_rectangle[2]
original_y2 = cur_rectangle[3]
original_w = original_x2 - original_x1
original_h = original_y2 - original_y1
cropped_caffe_img = caffe_img[original_y1:original_y2, original_x1:original_x2] #裁剪图片为预测框大小
caffe_img_resized = cv2.resize(cropped_caffe_img, (12, 12)) #图片变为12*12大小
caffe_img_resized_CHW = caffe_img_resized.transpose((2, 0, 1)) #将读取文件格式由HWC,变为CHW
net_12_cal.blobs['data'].reshape(1, *caffe_img_resized_CHW.shape) #更改网络输入data图像大小
net_12_cal.blobs['data'].data[...] = caffe_img_resized_CHW #将数据赋给数据层(第一层)
net_12_cal.forward() #网络开始前向传播
output = net_12_cal.blobs['prob'].data #输出的概率‘prob'
prediction = output[0] #(44,1)ndarray,数组
threshold = 0.1 #阈值
indices = np.nonzero(prediction > threshold)[0] #numpy.nonzero:返回值不为零的元素的下表
number_of_cals = len(indices) #预测值大于阈值的需要校准的数目
if number_of_cals == 0: # if no calibration is needed, check next rectangle
result.append(cur_rectangle)
continue #结束本次循环,进入下一次循环
total_s_change = 0 #偏移变量,用于矩形校正
total_x_change = 0
total_y_change = 0
for current_cal in range(number_of_cals): #累积变化,计算平均
cal_label = int(indices[current_cal]) #numpy.indices:返回一个数组,表示一个网格的索引
if (cal_label >= 0) and (cal_label <= 8): # decide s change
total_s_change += 0.83
elif (cal_label >= 9) and (cal_label <= 17):
total_s_change += 0.91
elif (cal_label >= 18) and (cal_label <= 26):
total_s_change += 1.0
elif (cal_label >= 27) and (cal_label <= 35):
total_s_change += 1.10
else:
total_s_change += 1.21
if cal_label % 9 <= 2: # decide x change
total_x_change += -0.17
elif (cal_label % 9 >= 6) and (cal_label % 9 <= 8): # ignore case when 3<=x<=5, since adding 0 doesn't change
total_x_change += 0.17
if cal_label % 3 == 0: # decide y change
total_y_change += -0.17
elif cal_label % 3 == 2: # ignore case when 1, since adding 0 doesn't change
total_y_change += 0.17
s_change = total_s_change / number_of_cals # calculate average
x_change = total_x_change / number_of_cals
y_change = total_y_change / number_of_cals
cur_result = cur_rectangle # inherit format and last two attributes from original rectangle
cur_result[0] = int(max(0, original_x1 - original_w * x_change / s_change))
cur_result[1] = int(max(0, original_y1 - original_h * y_change / s_change))
cur_result[2] = int(min(width, cur_result[0] + original_w / s_change))
cur_result[3] = int(min(height, cur_result[1] + original_h / s_change))
result.append(cur_result) #将cur_result追加到result之后
#result:排序对象,key:指定取待排序元素的哪一项进行排序
#reverse:选择排序方式,false为升序排列,ture为降序排列
result = sorted(result, key=itemgetter(4), reverse=True) #根据置信图排序
# reverse, so that it ranks from large to small
return result
def detect_face_24c(net_24c, caffe_img, rectangles):
'''
:param caffe_img: image in caffe style to detect faces
:param rectangles: rectangles in form [x11, y11, x12, y12, confidence, current_scale]
:return: rectangles after calibration
'''
result = []
for cur_rectangle in rectangles:
x1 = cur_rectangle[0]
y1 = cur_rectangle[1]
x2 = cur_rectangle[2]
y2 = cur_rectangle[3]
cropped_caffe_img = caffe_img[y1:y2, x1:x2] # crop image
caffe_img_resized = cv2.resize(cropped_caffe_img, (24, 24))
caffe_img_resized_CHW = caffe_img_resized.transpose((2, 0, 1))
net_24c.blobs['data'].reshape(1, *caffe_img_resized_CHW.shape)
net_24c.blobs['data'].data[...] = caffe_img_resized_CHW
net_24c.forward()
prediction = net_24c.blobs['prob'].data
confidence = prediction[0][1]
if confidence > 0.05:
cur_rectangle[4] = confidence
result.append(cur_rectangle)
return result
def cal_face_24c(net_24_cal, caffe_img, rectangles):
'''
:param caffe_image: image in caffe style to detect faces
:param rectangles: rectangles in form [x11, y11, x12, y12, confidence, current_scale]
:return: rectangles after calibration
'''
height, width, channels = caffe_img.shape
result = []
for cur_rectangle in rectangles:
original_x1 = cur_rectangle[0]
original_y1 = cur_rectangle[1]
original_x2 = cur_rectangle[2]
original_y2 = cur_rectangle[3]
original_w = original_x2 - original_x1
original_h = original_y2 - original_y1
cropped_caffe_img = caffe_img[original_y1:original_y2, original_x1:original_x2] # crop image
caffe_img_resized = cv2.resize(cropped_caffe_img, (24, 24))
caffe_img_resized_CHW = caffe_img_resized.transpose((2, 0, 1))
net_24_cal.blobs['data'].reshape(1, *caffe_img_resized_CHW.shape)
net_24_cal.blobs['data'].data[...] = caffe_img_resized_CHW
net_24_cal.forward()
output = net_24_cal.blobs['prob'].data
prediction = output[0] # (44, 1) ndarray
threshold = 0.1
indices = np.nonzero(prediction > threshold)[0] # ndarray of indices where prediction is larger than threshold
number_of_cals = len(indices) # number of calibrations larger than threshold
if number_of_cals == 0: # if no calibration is needed, check next rectangle
result.append(cur_rectangle)
continue
total_s_change = 0
total_x_change = 0
total_y_change = 0
for current_cal in range(number_of_cals): # accumulate changes, and calculate average
cal_label = int(indices[current_cal]) # should be number in 0~44
if (cal_label >= 0) and (cal_label <= 8): # decide s change
total_s_change += 0.83
elif (cal_label >= 9) and (cal_label <= 17):
total_s_change += 0.91
elif (cal_label >= 18) and (cal_label <= 26):
total_s_change += 1.0
elif (cal_label >= 27) and (cal_label <= 35):
total_s_change += 1.10
else:
total_s_change += 1.21
if cal_label % 9 <= 2: # decide x change
total_x_change += -0.17
elif (cal_label % 9 >= 6) and (cal_label % 9 <= 8): # ignore case when 3<=x<=5, since adding 0 doesn't change
total_x_change += 0.17
if cal_label % 3 == 0: # decide y change
total_y_change += -0.17
elif cal_label % 3 == 2: # ignore case when 1, since adding 0 doesn't change
total_y_change += 0.17
s_change = total_s_change / number_of_cals # calculate average
x_change = total_x_change / number_of_cals
y_change = total_y_change / number_of_cals
cur_result = cur_rectangle # inherit format and last two attributes from original rectangle
cur_result[0] = int(max(0, original_x1 - original_w * x_change / s_change))
cur_result[1] = int(max(0, original_y1 - original_h * y_change / s_change))
cur_result[2] = int(min(width, cur_result[0] + original_w / s_change))
cur_result[3] = int(min(height, cur_result[1] + original_h / s_change))
result.append(cur_result)
return result
def detect_face_48c(net_48c, caffe_img, rectangles):
'''
:param caffe_img: image in caffe style to detect faces
:param rectangles: rectangles in form [x11, y11, x12, y12, confidence, current_scale]
:return: rectangles after calibration
'''
result = []
for cur_rectangle in rectangles:
x1 = cur_rectangle[0]
y1 = cur_rectangle[1]
x2 = cur_rectangle[2]
y2 = cur_rectangle[3]
cropped_caffe_img = caffe_img[y1:y2, x1:x2] # crop image
caffe_img_resized = cv2.resize(cropped_caffe_img, (48, 48))
caffe_img_resized_CHW = caffe_img_resized.transpose((2, 0, 1))
net_48c.blobs['data'].reshape(1, *caffe_img_resized_CHW.shape)
net_48c.blobs['data'].data[...] = caffe_img_resized_CHW
net_48c.forward()
prediction = net_48c.blobs['prob'].data
confidence = prediction[0][1]
if confidence > 0.1:
cur_rectangle[4] = confidence
result.append(cur_rectangle)
result = sorted(result, key=itemgetter(4), reverse=True) # sort rectangles according to confidence
# reverse, so that it ranks from large to small
return result
def cal_face_48c(net_48_cal, caffe_img, rectangles):
'''
:param caffe_image: image in caffe style to detect faces
:param rectangles: rectangles in form [x11, y11, x12, y12, confidence, current_scale]
:return: rectangles after calibration
'''
height, width, channels = caffe_img.shape
result = []
for cur_rectangle in rectangles:
original_x1 = cur_rectangle[0]
original_y1 = cur_rectangle[1]
original_x2 = cur_rectangle[2]
original_y2 = cur_rectangle[3]
original_w = original_x2 - original_x1
original_h = original_y2 - original_y1
cropped_caffe_img = caffe_img[original_y1:original_y2, original_x1:original_x2] # crop image
caffe_img_resized = cv2.resize(cropped_caffe_img, (48, 48))
caffe_img_resized_CHW = caffe_img_resized.transpose((2, 0, 1))
net_48_cal.blobs['data'].reshape(1, *caffe_img_resized_CHW.shape)
net_48_cal.blobs['data'].data[...] = caffe_img_resized_CHW
net_48_cal.forward()
output = net_48_cal.blobs['prob'].data
prediction = output[0] # (44, 1) ndarray
threshold = 0.1
indices = np.nonzero(prediction > threshold)[0] # ndarray of indices where prediction is larger than threshold
number_of_cals = len(indices) # number of calibrations larger than threshold
if number_of_cals == 0: # if no calibration is needed, check next rectangle
result.append(cur_rectangle)
continue
total_s_change = 0
total_x_change = 0
total_y_change = 0
for current_cal in range(number_of_cals): # accumulate changes, and calculate average
cal_label = int(indices[current_cal]) # should be number in 0~44
if (cal_label >= 0) and (cal_label <= 8): # decide s change
total_s_change += 0.83
elif (cal_label >= 9) and (cal_label <= 17):
total_s_change += 0.91
elif (cal_label >= 18) and (cal_label <= 26):
total_s_change += 1.0
elif (cal_label >= 27) and (cal_label <= 35):
total_s_change += 1.10
else:
total_s_change += 1.21
if cal_label % 9 <= 2: # decide x change
total_x_change += -0.17
elif (cal_label % 9 >= 6) and (cal_label % 9 <= 8): # ignore case when 3<=x<=5, since adding 0 doesn't change
total_x_change += 0.17
if cal_label % 3 == 0: # decide y change
total_y_change += -0.17
elif cal_label % 3 == 2: # ignore case when 1, since adding 0 doesn't change
total_y_change += 0.17
s_change = total_s_change / number_of_cals # calculate average
x_change = total_x_change / number_of_cals
y_change = total_y_change / number_of_cals
cur_result = cur_rectangle # inherit format and last two attributes from original rectangle
cur_result[0] = int(max(0, original_x1 - original_w * x_change / s_change))
cur_result[1] = int(max(0, original_y1 - 1.1 * original_h * y_change / s_change))
cur_result[2] = int(min(width, cur_result[0] + original_w / s_change))
cur_result[3] = int(min(height, cur_result[1] + 1.1 * original_h / s_change))
result.append(cur_result)
return result
# ========================================================
#注:./FDDB-fold文件中,分别读取图片地址。依次读取每一个txt文件,再依次读取每个txt文件中图片地址进行处理
total_time = 0
total_images = 0
# load and open files to read and write
for current_file in range(1, 11):
print 'Processing file ' + str(current_file) + ' ...' #str(current_file):输出字符串current_file
read_file_name = './FDDB-fold/FDDB-fold-' + str(current_file).zfill(2) + '.txt' #读取路径文件txt
write_file_name = './detections/fold-' + str(current_file).zfill(2) + '-out.txt' #写出处理结果(框位置),写入txt文件中
write_file = open(write_file_name, "w") #打开文件,只写方式,写入txt文件中
with open(read_file_name, "r") as ins: #打开文件,赋给ins
array = []
for line in ins:
array.append(line) # list of strings
number_of_images = len(array) #图片个数
for current_image in range(number_of_images):
if current_image % 10 == 0: #每处理10个图片,打印输出一次
print 'Processing image : ' + str(current_image)
# load image and convert to gray
read_img_name = './FDDB/originalPics/' + array[current_image].rstrip() + '.jpg' #FDDB测试图片存放路径
img = cv2.imread(read_img_name) #读取图片
min_face_size = 40
stride = 5
start = time.clock() #返回当前处理器时间
img_forward = np.array(img, dtype=np.float32) #创建数组,数组类型为float32
img_forward -= np.array((104.00698793, 116.66876762, 122.67891434))
#对候选框进行筛选,校正部分,核心环节
rectangles = detect_face_12c(net_12c_full_conv, img_forward, min_face_size, stride, True) # detect faces
rectangles = cal_face_12c(net_12_cal, img_forward, rectangles) # calibration
rectangles = localNMS(rectangles) # apply local NMS
rectangles = detect_face_24c(net_24c, img_forward, rectangles)
rectangles = cal_face_24c(net_24_cal, img_forward, rectangles) # calibration
rectangles = localNMS(rectangles) # apply local NMS
rectangles = detect_face_48c(net_48c, img_forward, rectangles)
rectangles = globalNMS(rectangles) # apply global NMS
rectangles = cal_face_48c(net_48_cal, img_forward, rectangles) # calibration
end = time.clock()
total_time += (end - start)
total_images += 1
number_of_faces = len(rectangles)
# write to file
write_file.write(array[current_image]) #图片相对地址及图片名
write_file.write("{}\n".format( str(number_of_faces) ) ) #format函数格式化字符串
for i in range(number_of_faces):
write_file.write( "{} {} {} {} {}\n".format(str(rectangles[i][0]), str(rectangles[i][1]),
str(rectangles[i][2] - rectangles[i][0]),
str(rectangles[i][3] - rectangles[i][1]),
str(rectangles[i][4])))
# 例:
# 2002/08/26/big/img_256 #地址及文件名
# 1 #个数
# 275 78 120 132 0.999986
write_file.close()
print 'Average time spent on one image : ' + str(total_time / total_images) + ' s' #显示平均时间</span>