CascadeCNN python测试程序详解(

英文原文:A convolutional Neural Network Cascade for Face Detection(CascadeCNN)

<span style="font-size:24px;">#功能描述:调用caffemodel,检测一幅图片上所有人脸,将检测到的人脸区域及得分输出到txt文件中
import numpy as np   #numpy:提供矩阵运算功能的库
import cv2       #cv2:opencv库
import time      #time:时间模块
from operator import itemgetter  #operator提供一系列函数,itemgetter函数用于获取对象哪些维的数据
from load_model_functions import *  #从‘load_model_functions’文件中导入所有函数、类.....
from face_detection_functions import *

# ==================  caffe  ======================================
caffe_root = '/home/xiao/caffe/'  #caffe根目录
import sys   #导入系统相关的信息模块
sys.path.insert(0, caffe_root + 'python')
import caffe
# ==================  load models  ======================================
net_12c_full_conv, net_12_cal, net_24c, net_24_cal, net_48c, net_48_cal = \
def cal_face_12c(net_12_cal, caffe_img, rectangles):
    :param caffe_image: image in caffe style to detect faces
    :param rectangles:  rectangles in form [x11, y11, x12, y12, confidence, current_scale] #矩形框参数
    :return:    rectangles after calibration
    height, width, channels = caffe_img.shape  #H:高,W:宽,C:通道数
    result = []
    for cur_rectangle in rectangles:  #rectangles:所有预测框,cur_rectangle:当前1个预测框
        original_x1 = cur_rectangle[0]   #图像中左上角(x1,y1)为矩形框原点,即x轴与y轴交接点
        original_y1 = cur_rectangle[1]
        original_x2 = cur_rectangle[2]
        original_y2 = cur_rectangle[3]
        original_w = original_x2 - original_x1
        original_h = original_y2 - original_y1

        cropped_caffe_img = caffe_img[original_y1:original_y2, original_x1:original_x2] #裁剪图片为预测框大小
        caffe_img_resized = cv2.resize(cropped_caffe_img, (12, 12))  #图片变为12*12大小
        caffe_img_resized_CHW = caffe_img_resized.transpose((2, 0, 1))  #将读取文件格式由HWC,变为CHW
        net_12_cal.blobs['data'].reshape(1, *caffe_img_resized_CHW.shape)  #更改网络输入data图像大小
        net_12_cal.blobs['data'].data[...] = caffe_img_resized_CHW  #将数据赋给数据层(第一层)
        net_12_cal.forward()  #网络开始前向传播
        output = net_12_cal.blobs['prob'].data  #输出的概率‘prob'
        prediction = output[0]      #(44,1)ndarray,数组
        threshold = 0.1  #阈值
        indices = np.nonzero(prediction > threshold)[0]   #numpy.nonzero:返回值不为零的元素的下表
        number_of_cals = len(indices)   #预测值大于阈值的需要校准的数目

        if number_of_cals == 0:     # if no calibration is needed, check next rectangle
            continue      #结束本次循环,进入下一次循环

        total_s_change = 0  #偏移变量,用于矩形校正
        total_x_change = 0
        total_y_change = 0

        for current_cal in range(number_of_cals):       #累积变化,计算平均
            cal_label = int(indices[current_cal])   #numpy.indices:返回一个数组,表示一个网格的索引
            if (cal_label >= 0) and (cal_label <= 8):       # decide s change
                total_s_change += 0.83
            elif (cal_label >= 9) and (cal_label <= 17):
                total_s_change += 0.91
            elif (cal_label >= 18) and (cal_label <= 26):
                total_s_change += 1.0
            elif (cal_label >= 27) and (cal_label <= 35):
                total_s_change += 1.10
                total_s_change += 1.21

            if cal_label % 9 <= 2:       # decide x change
                total_x_change += -0.17
            elif (cal_label % 9 >= 6) and (cal_label % 9 <= 8):     # ignore case when 3<=x<=5, since adding 0 doesn't change
                total_x_change += 0.17

            if cal_label % 3 == 0:       # decide y change
                total_y_change += -0.17
            elif cal_label % 3 == 2:     # ignore case when 1, since adding 0 doesn't change
                total_y_change += 0.17

        s_change = total_s_change / number_of_cals      # calculate average
        x_change = total_x_change / number_of_cals
        y_change = total_y_change / number_of_cals

        cur_result = cur_rectangle      # inherit format and last two attributes from original rectangle
        cur_result[0] = int(max(0, original_x1 - original_w * x_change / s_change))
        cur_result[1] = int(max(0, original_y1 - original_h * y_change / s_change))
        cur_result[2] = int(min(width, cur_result[0] + original_w / s_change))
        cur_result[3] = int(min(height, cur_result[1] + original_h / s_change))

        result.append(cur_result)  #将cur_result追加到result之后
    result = sorted(result, key=itemgetter(4), reverse=True)    #根据置信图排序
                                                                        # reverse, so that it ranks from large to small
    return result
def detect_face_24c(net_24c, caffe_img, rectangles):
    :param caffe_img: image in caffe style to detect faces
    :param rectangles:  rectangles in form [x11, y11, x12, y12, confidence, current_scale]
    :return:    rectangles after calibration
    result = []
    for cur_rectangle in rectangles:

        x1 = cur_rectangle[0]
        y1 = cur_rectangle[1]
        x2 = cur_rectangle[2]
        y2 = cur_rectangle[3]

        cropped_caffe_img = caffe_img[y1:y2, x1:x2]     # crop image

        caffe_img_resized = cv2.resize(cropped_caffe_img, (24, 24))
        caffe_img_resized_CHW = caffe_img_resized.transpose((2, 0, 1))
        net_24c.blobs['data'].reshape(1, *caffe_img_resized_CHW.shape)
        net_24c.blobs['data'].data[...] = caffe_img_resized_CHW
        prediction = net_24c.blobs['prob'].data

        confidence = prediction[0][1]

        if confidence > 0.05:
            cur_rectangle[4] = confidence

    return result
def cal_face_24c(net_24_cal, caffe_img, rectangles):
    :param caffe_image: image in caffe style to detect faces
    :param rectangles:  rectangles in form [x11, y11, x12, y12, confidence, current_scale]
    :return:    rectangles after calibration
    height, width, channels = caffe_img.shape
    result = []
    for cur_rectangle in rectangles:

        original_x1 = cur_rectangle[0]
        original_y1 = cur_rectangle[1]
        original_x2 = cur_rectangle[2]
        original_y2 = cur_rectangle[3]
        original_w = original_x2 - original_x1
        original_h = original_y2 - original_y1

        cropped_caffe_img = caffe_img[original_y1:original_y2, original_x1:original_x2] # crop image

        caffe_img_resized = cv2.resize(cropped_caffe_img, (24, 24))
        caffe_img_resized_CHW = caffe_img_resized.transpose((2, 0, 1))
        net_24_cal.blobs['data'].reshape(1, *caffe_img_resized_CHW.shape)
        net_24_cal.blobs['data'].data[...] = caffe_img_resized_CHW
        output = net_24_cal.blobs['prob'].data

        prediction = output[0]      # (44, 1) ndarray

        threshold = 0.1
        indices = np.nonzero(prediction > threshold)[0]   # ndarray of indices where prediction is larger than threshold

        number_of_cals = len(indices)   # number of calibrations larger than threshold

        if number_of_cals == 0:     # if no calibration is needed, check next rectangle

        total_s_change = 0
        total_x_change = 0
        total_y_change = 0

        for current_cal in range(number_of_cals):       # accumulate changes, and calculate average
            cal_label = int(indices[current_cal])   # should be number in 0~44
            if (cal_label >= 0) and (cal_label <= 8):       # decide s change
                total_s_change += 0.83
            elif (cal_label >= 9) and (cal_label <= 17):
                total_s_change += 0.91
            elif (cal_label >= 18) and (cal_label <= 26):
                total_s_change += 1.0
            elif (cal_label >= 27) and (cal_label <= 35):
                total_s_change += 1.10
                total_s_change += 1.21

            if cal_label % 9 <= 2:       # decide x change
                total_x_change += -0.17
            elif (cal_label % 9 >= 6) and (cal_label % 9 <= 8):     # ignore case when 3<=x<=5, since adding 0 doesn't change
                total_x_change += 0.17

            if cal_label % 3 == 0:       # decide y change
                total_y_change += -0.17
            elif cal_label % 3 == 2:     # ignore case when 1, since adding 0 doesn't change
                total_y_change += 0.17

        s_change = total_s_change / number_of_cals      # calculate average
        x_change = total_x_change / number_of_cals
        y_change = total_y_change / number_of_cals

        cur_result = cur_rectangle      # inherit format and last two attributes from original rectangle
        cur_result[0] = int(max(0, original_x1 - original_w * x_change / s_change))
        cur_result[1] = int(max(0, original_y1 - original_h * y_change / s_change))
        cur_result[2] = int(min(width, cur_result[0] + original_w / s_change))
        cur_result[3] = int(min(height, cur_result[1] + original_h / s_change))


    return result
def detect_face_48c(net_48c, caffe_img, rectangles):
    :param caffe_img: image in caffe style to detect faces
    :param rectangles:  rectangles in form [x11, y11, x12, y12, confidence, current_scale]
    :return:    rectangles after calibration
    result = []
    for cur_rectangle in rectangles:

        x1 = cur_rectangle[0]
        y1 = cur_rectangle[1]
        x2 = cur_rectangle[2]
        y2 = cur_rectangle[3]

        cropped_caffe_img = caffe_img[y1:y2, x1:x2]     # crop image

        caffe_img_resized = cv2.resize(cropped_caffe_img, (48, 48))
        caffe_img_resized_CHW = caffe_img_resized.transpose((2, 0, 1))
        net_48c.blobs['data'].reshape(1, *caffe_img_resized_CHW.shape)
        net_48c.blobs['data'].data[...] = caffe_img_resized_CHW
        prediction = net_48c.blobs['prob'].data

        confidence = prediction[0][1]

        if confidence > 0.1:
            cur_rectangle[4] = confidence

    result = sorted(result, key=itemgetter(4), reverse=True)    # sort rectangles according to confidence
                                                                        # reverse, so that it ranks from large to small
    return result
def cal_face_48c(net_48_cal, caffe_img, rectangles):
    :param caffe_image: image in caffe style to detect faces
    :param rectangles:  rectangles in form [x11, y11, x12, y12, confidence, current_scale]
    :return:    rectangles after calibration
    height, width, channels = caffe_img.shape
    result = []
    for cur_rectangle in rectangles:

        original_x1 = cur_rectangle[0]
        original_y1 = cur_rectangle[1]
        original_x2 = cur_rectangle[2]
        original_y2 = cur_rectangle[3]
        original_w = original_x2 - original_x1
        original_h = original_y2 - original_y1

        cropped_caffe_img = caffe_img[original_y1:original_y2, original_x1:original_x2] # crop image
        caffe_img_resized = cv2.resize(cropped_caffe_img, (48, 48))
        caffe_img_resized_CHW = caffe_img_resized.transpose((2, 0, 1))
        net_48_cal.blobs['data'].reshape(1, *caffe_img_resized_CHW.shape)
        net_48_cal.blobs['data'].data[...] = caffe_img_resized_CHW
        output = net_48_cal.blobs['prob'].data

        prediction = output[0]      # (44, 1) ndarray

        threshold = 0.1
        indices = np.nonzero(prediction > threshold)[0]   # ndarray of indices where prediction is larger than threshold

        number_of_cals = len(indices)   # number of calibrations larger than threshold

        if number_of_cals == 0:     # if no calibration is needed, check next rectangle

        total_s_change = 0
        total_x_change = 0
        total_y_change = 0

        for current_cal in range(number_of_cals):       # accumulate changes, and calculate average
            cal_label = int(indices[current_cal])   # should be number in 0~44
            if (cal_label >= 0) and (cal_label <= 8):       # decide s change
                total_s_change += 0.83
            elif (cal_label >= 9) and (cal_label <= 17):
                total_s_change += 0.91
            elif (cal_label >= 18) and (cal_label <= 26):
                total_s_change += 1.0
            elif (cal_label >= 27) and (cal_label <= 35):
                total_s_change += 1.10
                total_s_change += 1.21

            if cal_label % 9 <= 2:       # decide x change
                total_x_change += -0.17
            elif (cal_label % 9 >= 6) and (cal_label % 9 <= 8):     # ignore case when 3<=x<=5, since adding 0 doesn't change
                total_x_change += 0.17

            if cal_label % 3 == 0:       # decide y change
                total_y_change += -0.17
            elif cal_label % 3 == 2:     # ignore case when 1, since adding 0 doesn't change
                total_y_change += 0.17

        s_change = total_s_change / number_of_cals      # calculate average
        x_change = total_x_change / number_of_cals
        y_change = total_y_change / number_of_cals

        cur_result = cur_rectangle      # inherit format and last two attributes from original rectangle
        cur_result[0] = int(max(0, original_x1 - original_w * x_change / s_change))
        cur_result[1] = int(max(0, original_y1 - 1.1 * original_h * y_change / s_change))
        cur_result[2] = int(min(width, cur_result[0] + original_w / s_change))
        cur_result[3] = int(min(height, cur_result[1] + 1.1 * original_h / s_change))


    return result

# ========================================================
total_time = 0
total_images = 0

# load and open files to read and write
for current_file in range(1, 11):

    print 'Processing file ' + str(current_file) + ' ...'   #str(current_file):输出字符串current_file

    read_file_name = './FDDB-fold/FDDB-fold-' + str(current_file).zfill(2) + '.txt'  #读取路径文件txt
    write_file_name = './detections/fold-' + str(current_file).zfill(2) + '-out.txt'  #写出处理结果(框位置),写入txt文件中
    write_file = open(write_file_name, "w")  #打开文件,只写方式,写入txt文件中

    with open(read_file_name, "r") as ins:  #打开文件,赋给ins
        array = []
        for line in ins:
            array.append(line)      # list of strings

    number_of_images = len(array)  #图片个数

    for current_image in range(number_of_images):
        if current_image % 10 == 0:  #每处理10个图片,打印输出一次
            print 'Processing image : ' + str(current_image)
        # load image and convert to gray
        read_img_name = './FDDB/originalPics/' + array[current_image].rstrip() + '.jpg'  #FDDB测试图片存放路径
        img = cv2.imread(read_img_name)     #读取图片

        min_face_size = 40
        stride = 5

        start = time.clock()  #返回当前处理器时间

        img_forward = np.array(img, dtype=np.float32)  #创建数组,数组类型为float32
        img_forward -= np.array((104.00698793, 116.66876762, 122.67891434))
        rectangles = detect_face_12c(net_12c_full_conv, img_forward, min_face_size, stride, True)     # detect faces
        rectangles = cal_face_12c(net_12_cal, img_forward, rectangles)      # calibration
        rectangles = localNMS(rectangles)      # apply local NMS
        rectangles = detect_face_24c(net_24c, img_forward, rectangles)
        rectangles = cal_face_24c(net_24_cal, img_forward, rectangles)      # calibration
        rectangles = localNMS(rectangles)      # apply local NMS
        rectangles = detect_face_48c(net_48c, img_forward, rectangles)
        rectangles = globalNMS(rectangles)      # apply global NMS
        rectangles = cal_face_48c(net_48_cal, img_forward, rectangles)      # calibration

        end = time.clock()
        total_time += (end - start)
        total_images += 1

        number_of_faces = len(rectangles)
        # write to file
        write_file.write(array[current_image])  #图片相对地址及图片名
        write_file.write("{}\n".format( str(number_of_faces) ) )  #format函数格式化字符串
        for i in range(number_of_faces):
            write_file.write( "{} {} {} {} {}\n".format(str(rectangles[i][0]), str(rectangles[i][1]),
                                                        str(rectangles[i][2] - rectangles[i][0]),
                                                        str(rectangles[i][3] - rectangles[i][1]),
		#  例:
		#  2002/08/26/big/img_256  #地址及文件名
		#  1   #个数
		#  275 78 120 132 0.999986
    print 'Average time spent on one image : ' + str(total_time / total_images) + ' s'  #显示平均时间</span>

