dlunion caffe加入hard examples 训练 R-Net,O-Net

dlunion caffe加入hard examples 训练 R-Net,O-Net

上一篇提到,dlunion作者的caffe训练MTCNN并没有加入hard examples,所里这里将加入hard examples 训练R-Net和O-Net.

生成R-Net的训练数据

执行

python create_hard1.py

create_hard1.py的代码如下:

import sys
import tools
import caffe
import cv2
import numpy as np
import os
from utils import *

deploy = '../12/det1.prototxt'
caffemodel = '../12/det1.caffemodel'
net_12 = caffe.Net(deploy, caffemodel, caffe.TEST)

def view_bar(num, total):
    rate = float(num) / total
    rate_num = int(rate * 100)
    r = '\r[%s%s]%d%%  (%d/%d)' % ("#"*rate_num, " "*(100-rate_num), rate_num, num, total)
    sys.stdout.write(r)
    sys.stdout.flush()

def detectFace(img_path, threshold):
    img = cv2.imread(img_path)
    caffe_img = img.copy()-128
    origin_h, origin_w, ch = caffe_img.shape
    scales = tools.calculateScales(img)
    out = []
    for scale in scales:
        hs = int(origin_h*scale)
        ws = int(origin_w*scale)
        scale_img = cv2.resize(caffe_img, (ws, hs))
        scale_img = np.swapaxes(scale_img, 0, 2)
        net_12.blobs['data'].reshape(1, 3, ws, hs)
        net_12.blobs['data'].data[...] = scale_img
        caffe.set_device(0)
        caffe.set_mode_gpu()
        out_ = net_12.forward()
        out.append(out_)
    image_num = len(scales)
    rectangles = []
    for i in range(image_num):
        cls_prob = out[i]['prob1'][0][1]
        roi = out[i]['conv4-2'][0]
        out_h, out_w = cls_prob.shape
        out_side = max(out_h, out_w)
        rectangle = tools.detect_face_12net(cls_prob, roi, out_side, 1/scales[i], origin_w, origin_h, threshold[0])
        rectangles.extend(rectangle)
    return rectangles
anno_file = './wider_face_train.txt'
im_dir = "./WIDER_train/images/"
neg_save_dir = "./24/negative"
pos_save_dir = "./24/positive"
part_save_dir = "./24/part"

image_size = 24
f1 = open('./24/pos_24.txt', 'a')
f2 = open('./24/neg_24.txt', 'a')
f3 = open('./24/part_24.txt', 'a')
threshold = [0.6, 0.6, 0.7]
with open(anno_file, 'r') as f:
    annotations = f.readlines()
num = len(annotations)
print("%d pics in total" % num)

p_idx = 0  # positive
n_idx = 0  # negative
d_idx = 0  # dont care
image_idx = 0

for annotation in annotations:
    annotation = annotation.strip().split(' ')
    bbox = list(map(float, annotation[1:]))
    gts = np.array(bbox, dtype=np.float32).reshape(-1, 4)
    img_path = im_dir + annotation[0] + '.jpg'
    rectangles = detectFace(img_path, threshold)
    img = cv2.imread(img_path)
    image_idx += 1
    view_bar(image_idx,num)
    for box in rectangles:
        x_left, y_top, x_right, y_bottom, _ = box
        crop_w = x_right - x_left + 1
        crop_h = y_bottom - y_top + 1
        # ignore box that is too small or beyond image border
        if crop_w < image_size or crop_h < image_size :
            continue

        # compute intersection over union(IoU) between current box and all gt boxes
        Iou = IoU(box, gts)
        cropped_im = img[y_top:y_bottom + 1, x_left:x_right + 1]
        resized_im = cv2.resize(cropped_im, (image_size, image_size), interpolation=cv2.INTER_LINEAR)

        # save negative images and write label
        if np.max(Iou) < 0.3:
            # Iou with all gts must below 0.3
            save_file = os.path.join(neg_save_dir, "%s.jpg" % n_idx)
            f2.write("%s/negative/%s" % (image_size, n_idx) + ' 0\n')
            cv2.imwrite(save_file, resized_im)
            n_idx += 1
        else:
            # find gt_box with the highest iou
            idx = np.argmax(Iou)
            assigned_gt = gts[idx]
            x1, y1, x2, y2 = assigned_gt

            # compute bbox reg label
            offset_x1 = (x1 - x_left) / float(crop_w)
            offset_y1 = (y1 - y_top) / float(crop_h)
            offset_x2 = (x2 - x_right) / float(crop_w)
            offset_y2 = (y2 - y_bottom) / float(crop_h)

            # save positive and part-face images and write labels
            if np.max(Iou) >= 0.65:
                save_file = os.path.join(pos_save_dir, "%s.jpg" % p_idx)
                f1.write("%s/positive/%s" % (image_size, p_idx) + ' 1 %.2f %.2f %.2f %.2f\n' % (offset_x1, offset_y1, offset_x2, offset_y2))
                cv2.imwrite(save_file, resized_im)
                p_idx += 1

            elif np.max(Iou) >= 0.4:
                save_file = os.path.join(part_save_dir, "%s.jpg" % d_idx)
                f3.write("%s/part/%s" % (image_size, d_idx) + ' -1 %.2f %.2f %.2f %.2f\n' % (offset_x1, offset_y1, offset_x2, offset_y2))
                cv2.imwrite(save_file, resized_im)
                d_idx += 1

f1.close()
f2.close()
f3.close()

其中会导入tools.py,这个脚本的代码如下:

import sys
from operator import itemgetter
import numpy as np
import cv2
'''
Function:
    calculate Intersect of Union
Input: 
    rect_1: 1st rectangle
    rect_2: 2nd rectangle
Output:
    IoU
'''
def IoU(rect_1, rect_2):
    x11 = rect_1[0]    # first rectangle top left x
    y11 = rect_1[1]    # first rectangle top left y
    x12 = rect_1[2]    # first rectangle bottom right x
    y12 = rect_1[3]    # first rectangle bottom right y
    x21 = rect_2[0]    # second rectangle top left x
    y21 = rect_2[1]    # second rectangle top left y
    x22 = rect_2[2]    # second rectangle bottom right x
    y22 = rect_2[3]    # second rectangle bottom right y
    x_overlap = max(0, min(x12,x22) -max(x11,x21))
    y_overlap = max(0, min(y12,y22) -max(y11,y21))
    intersection = x_overlap * y_overlap
    union = (x12-x11) * (y12-y11) + (x22-x21) * (y22-y21) - intersection
    if union == 0:
        return 0
    else:
        return float(intersection) / union
'''
Function:
    calculate Intersect of Min area
Input: 
    rect_1: 1st rectangle
    rect_2: 2nd rectangle
Output:
    IoM
'''
def IoM(rect_1, rect_2):
    x11 = rect_1[0]    # first rectangle top left x
    y11 = rect_1[1]    # first rectangle top left y
    x12 = rect_1[2]    # first rectangle bottom right x
    y12 = rect_1[3]    # first rectangle bottom right y
    x21 = rect_2[0]    # second rectangle top left x
    y21 = rect_2[1]    # second rectangle top left y
    x22 = rect_2[2]    # second rectangle bottom right x
    y22 = rect_2[3]    # second rectangle bottom right y
    x_overlap = max(0, min(x12,x22) -max(x11,x21))
    y_overlap = max(0, min(y12,y22) -max(y11,y21))
    intersection = x_overlap * y_overlap
    rect1_area = (y12 - y11) * (x12 - x11)
    rect2_area = (y22 - y21) * (x22 - x21)
    min_area = min(rect1_area, rect2_area)
    return float(intersection) / min_area
'''
Function:
    apply NMS(non-maximum suppression) on ROIs in same scale
Input:
    rectangles: rectangles[i][0:3] is the position, rectangles[i][4] is scale, rectangles[i][5] is score
Output:
    rectangles: same as input
'''
def NMS(rectangles, threshold, type):
    sorted(rectangles, key=itemgetter(4), reverse=True)
    result_rectangles = rectangles
    number_of_rects = len(result_rectangles)
    cur_rect = 0
    while cur_rect < number_of_rects : 
        rects_to_compare = number_of_rects - cur_rect - 1 
        cur_rect_to_compare = cur_rect + 1 
        while rects_to_compare > 0:
            score = 0
            if type == 'iou':
                score = IoU(result_rectangles[cur_rect], result_rectangles[cur_rect_to_compare])
            else:
                score = IoM(result_rectangles[cur_rect], result_rectangles[cur_rect_to_compare])
            if score >= threshold:
                del result_rectangles[cur_rect_to_compare]      # delete the rectangle
                number_of_rects -= 1
            else:
                cur_rect_to_compare += 1    # skip to next rectangle            
            rects_to_compare -= 1
        cur_rect += 1   # finished comparing for current rectangle
    return result_rectangles

def detect_face_12net(cls_prob,roi,out_side,scale,width,height,threshold):
    in_side = 2*out_side+11
    stride = 0
    if out_side != 1:
        stride = float(in_side-12)/(out_side-1)
    boundingBox = []

    for (x, y), prob in np.ndenumerate(cls_prob):
        if(prob >= threshold):
            original_x1 = int((stride*x + 1)*scale)
            original_y1 = int((stride*y + 1)*scale)
            original_w = int((12.0 - 1)*scale)
            original_h = int((12.0 - 1)*scale)
            original_x2 = original_x1 + original_w
            original_y2 = original_y1 + original_h
            rect = []
            x1 = int(round(max(0, original_x1 + original_w * roi[0][x][y])))
            y1 = int(round(max(0, original_y1 + original_h * roi[1][x][y])))
            x2 = int(round(min(width, original_x2 + original_w * roi[2][x][y])))
            y2 = int(round(min(height, original_y2 + original_h * roi[3][x][y])))
            if x2 > x1 and y2 > y1:
                rect = [x1, y1, x2, y2, prob]
                boundingBox.append(rect)
    return NMS(boundingBox, 0.5, 'iou')

def filter_face_24net(cls_prob,roi,rectangles,width,height,threshold):
    boundingBox = []
    rect_num = len(rectangles)
    for i in range(rect_num):
        if cls_prob[i][1] > threshold:
            original_w = rectangles[i][2]-rectangles[i][0]+1
            original_h = rectangles[i][3]-rectangles[i][1]+1
            x1 = int(round(max(0, rectangles[i][0] + original_w * roi[i][0])))
            y1 = int(round(max(0, rectangles[i][1] + original_h * roi[i][1])))
            x2 = int(round(min(width, rectangles[i][2] + original_w * roi[i][2])))
            y2 = int(round(min(height, rectangles[i][3] + original_h * roi[i][3])))
            if x2 > x1 and y2 > y1:
                rect = [x1, y1, x2, y2, cls_prob[i][1]]
                boundingBox.append(rect)
    return NMS(boundingBox,0.7,'iou')

def filter_face_48net(cls_prob, roi, pts, rectangles, width, height, threshold):
    boundingBox = []
    rect_num = len(rectangles)
    for i in range(rect_num):
        if cls_prob[i][1]>threshold:
            rect = [rectangles[i][0], rectangles[i][1], rectangles[i][2], rectangles[i][3], cls_prob[i][1],
                    roi[i][0], roi[i][1], roi[i][2], roi[i][3],
                    pts[i][0], pts[i][5], pts[i][1], pts[i][6], pts[i][2], pts[i][7], pts[i][3], pts[i][8], pts[i][4], pts[i][9]]
            boundingBox.append(rect)
        rectangles = NMS(boundingBox, 0.7, 'iom')
        rect = []
    
    for rectangle in rectangles:
        roi_w = rectangle[2]-rectangle[0]+1
        roi_h = rectangle[3]-rectangle[1]+1

        x1 = round(max(0, rectangle[0]+rectangle[5]*roi_w))
        y1 = round(max(0, rectangle[1]+rectangle[6]*roi_h))
        x2 = round(min(width, rectangle[2]+rectangle[7]*roi_w))
        y2 = round(min(height, rectangle[3]+rectangle[8]*roi_h))
        pt0 = rectangle[9]*roi_w + rectangle[0] - 1
        pt1 = rectangle[10]*roi_h + rectangle[1] - 1
        pt2 = rectangle[11]*roi_w + rectangle[0] - 1
        pt3 = rectangle[12]*roi_h + rectangle[1] - 1
        pt4 = rectangle[13]*roi_w + rectangle[0] - 1
        pt5 = rectangle[14]*roi_h + rectangle[1] - 1
        pt6 = rectangle[15]*roi_w + rectangle[0] - 1
        pt7 = rectangle[16]*roi_h + rectangle[1] - 1
        pt8 = rectangle[17]*roi_w + rectangle[0] - 1
        pt9 = rectangle[18]*roi_h + rectangle[1] - 1
        score = rectangle[4]
        rect_ = np.round([x1, y1, x2, y2, pt0, pt1, pt2, pt3, pt4, pt5, pt6, pt7, pt8, pt9]).astype(int)
        rect_ = np.append(rect_, score)
        rect.append(rect_)
        return rect

def calculateScales(img):
    caffe_img = img.copy()
    h, w, ch = caffe_img.shape
    pr_scale = 1000.0/max(h, w)
    w = int(w*pr_scale)
    h = int(h*pr_scale)

    scales = []
    factor = 0.7937
    factor_count = 0
    minl = min(h, w)
    while minl >= 12:
        scales.append(pr_scale*pow(factor, factor_count))
        minl *= factor
        factor_count += 1
    return scales

生成O-Net的hard-examples的代码类似,只需要把create_hard1.py中的12修改成24。
生成难样本之后的数据预处理和训练步骤与训练P-Net的步骤类似,具体参见上一篇

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值