dlunion caffe加入hard examples 训练 R-Net,O-Net
上一篇提到,dlunion作者的caffe训练MTCNN并没有加入hard examples,所里这里将加入hard examples 训练R-Net和O-Net.
生成R-Net的训练数据
执行
python create_hard1.py
create_hard1.py的代码如下:
import sys
import tools
import caffe
import cv2
import numpy as np
import os
from utils import *
deploy = '../12/det1.prototxt'
caffemodel = '../12/det1.caffemodel'
net_12 = caffe.Net(deploy, caffemodel, caffe.TEST)
def view_bar(num, total):
rate = float(num) / total
rate_num = int(rate * 100)
r = '\r[%s%s]%d%% (%d/%d)' % ("#"*rate_num, " "*(100-rate_num), rate_num, num, total)
sys.stdout.write(r)
sys.stdout.flush()
def detectFace(img_path, threshold):
img = cv2.imread(img_path)
caffe_img = img.copy()-128
origin_h, origin_w, ch = caffe_img.shape
scales = tools.calculateScales(img)
out = []
for scale in scales:
hs = int(origin_h*scale)
ws = int(origin_w*scale)
scale_img = cv2.resize(caffe_img, (ws, hs))
scale_img = np.swapaxes(scale_img, 0, 2)
net_12.blobs['data'].reshape(1, 3, ws, hs)
net_12.blobs['data'].data[...] = scale_img
caffe.set_device(0)
caffe.set_mode_gpu()
out_ = net_12.forward()
out.append(out_)
image_num = len(scales)
rectangles = []
for i in range(image_num):
cls_prob = out[i]['prob1'][0][1]
roi = out[i]['conv4-2'][0]
out_h, out_w = cls_prob.shape
out_side = max(out_h, out_w)
rectangle = tools.detect_face_12net(cls_prob, roi, out_side, 1/scales[i], origin_w, origin_h, threshold[0])
rectangles.extend(rectangle)
return rectangles
anno_file = './wider_face_train.txt'
im_dir = "./WIDER_train/images/"
neg_save_dir = "./24/negative"
pos_save_dir = "./24/positive"
part_save_dir = "./24/part"
image_size = 24
f1 = open('./24/pos_24.txt', 'a')
f2 = open('./24/neg_24.txt', 'a')
f3 = open('./24/part_24.txt', 'a')
threshold = [0.6, 0.6, 0.7]
with open(anno_file, 'r') as f:
annotations = f.readlines()
num = len(annotations)
print("%d pics in total" % num)
p_idx = 0 # positive
n_idx = 0 # negative
d_idx = 0 # dont care
image_idx = 0
for annotation in annotations:
annotation = annotation.strip().split(' ')
bbox = list(map(float, annotation[1:]))
gts = np.array(bbox, dtype=np.float32).reshape(-1, 4)
img_path = im_dir + annotation[0] + '.jpg'
rectangles = detectFace(img_path, threshold)
img = cv2.imread(img_path)
image_idx += 1
view_bar(image_idx,num)
for box in rectangles:
x_left, y_top, x_right, y_bottom, _ = box
crop_w = x_right - x_left + 1
crop_h = y_bottom - y_top + 1
# ignore box that is too small or beyond image border
if crop_w < image_size or crop_h < image_size :
continue
# compute intersection over union(IoU) between current box and all gt boxes
Iou = IoU(box, gts)
cropped_im = img[y_top:y_bottom + 1, x_left:x_right + 1]
resized_im = cv2.resize(cropped_im, (image_size, image_size), interpolation=cv2.INTER_LINEAR)
# save negative images and write label
if np.max(Iou) < 0.3:
# Iou with all gts must below 0.3
save_file = os.path.join(neg_save_dir, "%s.jpg" % n_idx)
f2.write("%s/negative/%s" % (image_size, n_idx) + ' 0\n')
cv2.imwrite(save_file, resized_im)
n_idx += 1
else:
# find gt_box with the highest iou
idx = np.argmax(Iou)
assigned_gt = gts[idx]
x1, y1, x2, y2 = assigned_gt
# compute bbox reg label
offset_x1 = (x1 - x_left) / float(crop_w)
offset_y1 = (y1 - y_top) / float(crop_h)
offset_x2 = (x2 - x_right) / float(crop_w)
offset_y2 = (y2 - y_bottom) / float(crop_h)
# save positive and part-face images and write labels
if np.max(Iou) >= 0.65:
save_file = os.path.join(pos_save_dir, "%s.jpg" % p_idx)
f1.write("%s/positive/%s" % (image_size, p_idx) + ' 1 %.2f %.2f %.2f %.2f\n' % (offset_x1, offset_y1, offset_x2, offset_y2))
cv2.imwrite(save_file, resized_im)
p_idx += 1
elif np.max(Iou) >= 0.4:
save_file = os.path.join(part_save_dir, "%s.jpg" % d_idx)
f3.write("%s/part/%s" % (image_size, d_idx) + ' -1 %.2f %.2f %.2f %.2f\n' % (offset_x1, offset_y1, offset_x2, offset_y2))
cv2.imwrite(save_file, resized_im)
d_idx += 1
f1.close()
f2.close()
f3.close()
其中会导入tools.py,这个脚本的代码如下:
import sys
from operator import itemgetter
import numpy as np
import cv2
'''
Function:
calculate Intersect of Union
Input:
rect_1: 1st rectangle
rect_2: 2nd rectangle
Output:
IoU
'''
def IoU(rect_1, rect_2):
x11 = rect_1[0] # first rectangle top left x
y11 = rect_1[1] # first rectangle top left y
x12 = rect_1[2] # first rectangle bottom right x
y12 = rect_1[3] # first rectangle bottom right y
x21 = rect_2[0] # second rectangle top left x
y21 = rect_2[1] # second rectangle top left y
x22 = rect_2[2] # second rectangle bottom right x
y22 = rect_2[3] # second rectangle bottom right y
x_overlap = max(0, min(x12,x22) -max(x11,x21))
y_overlap = max(0, min(y12,y22) -max(y11,y21))
intersection = x_overlap * y_overlap
union = (x12-x11) * (y12-y11) + (x22-x21) * (y22-y21) - intersection
if union == 0:
return 0
else:
return float(intersection) / union
'''
Function:
calculate Intersect of Min area
Input:
rect_1: 1st rectangle
rect_2: 2nd rectangle
Output:
IoM
'''
def IoM(rect_1, rect_2):
x11 = rect_1[0] # first rectangle top left x
y11 = rect_1[1] # first rectangle top left y
x12 = rect_1[2] # first rectangle bottom right x
y12 = rect_1[3] # first rectangle bottom right y
x21 = rect_2[0] # second rectangle top left x
y21 = rect_2[1] # second rectangle top left y
x22 = rect_2[2] # second rectangle bottom right x
y22 = rect_2[3] # second rectangle bottom right y
x_overlap = max(0, min(x12,x22) -max(x11,x21))
y_overlap = max(0, min(y12,y22) -max(y11,y21))
intersection = x_overlap * y_overlap
rect1_area = (y12 - y11) * (x12 - x11)
rect2_area = (y22 - y21) * (x22 - x21)
min_area = min(rect1_area, rect2_area)
return float(intersection) / min_area
'''
Function:
apply NMS(non-maximum suppression) on ROIs in same scale
Input:
rectangles: rectangles[i][0:3] is the position, rectangles[i][4] is scale, rectangles[i][5] is score
Output:
rectangles: same as input
'''
def NMS(rectangles, threshold, type):
sorted(rectangles, key=itemgetter(4), reverse=True)
result_rectangles = rectangles
number_of_rects = len(result_rectangles)
cur_rect = 0
while cur_rect < number_of_rects :
rects_to_compare = number_of_rects - cur_rect - 1
cur_rect_to_compare = cur_rect + 1
while rects_to_compare > 0:
score = 0
if type == 'iou':
score = IoU(result_rectangles[cur_rect], result_rectangles[cur_rect_to_compare])
else:
score = IoM(result_rectangles[cur_rect], result_rectangles[cur_rect_to_compare])
if score >= threshold:
del result_rectangles[cur_rect_to_compare] # delete the rectangle
number_of_rects -= 1
else:
cur_rect_to_compare += 1 # skip to next rectangle
rects_to_compare -= 1
cur_rect += 1 # finished comparing for current rectangle
return result_rectangles
def detect_face_12net(cls_prob,roi,out_side,scale,width,height,threshold):
in_side = 2*out_side+11
stride = 0
if out_side != 1:
stride = float(in_side-12)/(out_side-1)
boundingBox = []
for (x, y), prob in np.ndenumerate(cls_prob):
if(prob >= threshold):
original_x1 = int((stride*x + 1)*scale)
original_y1 = int((stride*y + 1)*scale)
original_w = int((12.0 - 1)*scale)
original_h = int((12.0 - 1)*scale)
original_x2 = original_x1 + original_w
original_y2 = original_y1 + original_h
rect = []
x1 = int(round(max(0, original_x1 + original_w * roi[0][x][y])))
y1 = int(round(max(0, original_y1 + original_h * roi[1][x][y])))
x2 = int(round(min(width, original_x2 + original_w * roi[2][x][y])))
y2 = int(round(min(height, original_y2 + original_h * roi[3][x][y])))
if x2 > x1 and y2 > y1:
rect = [x1, y1, x2, y2, prob]
boundingBox.append(rect)
return NMS(boundingBox, 0.5, 'iou')
def filter_face_24net(cls_prob,roi,rectangles,width,height,threshold):
boundingBox = []
rect_num = len(rectangles)
for i in range(rect_num):
if cls_prob[i][1] > threshold:
original_w = rectangles[i][2]-rectangles[i][0]+1
original_h = rectangles[i][3]-rectangles[i][1]+1
x1 = int(round(max(0, rectangles[i][0] + original_w * roi[i][0])))
y1 = int(round(max(0, rectangles[i][1] + original_h * roi[i][1])))
x2 = int(round(min(width, rectangles[i][2] + original_w * roi[i][2])))
y2 = int(round(min(height, rectangles[i][3] + original_h * roi[i][3])))
if x2 > x1 and y2 > y1:
rect = [x1, y1, x2, y2, cls_prob[i][1]]
boundingBox.append(rect)
return NMS(boundingBox,0.7,'iou')
def filter_face_48net(cls_prob, roi, pts, rectangles, width, height, threshold):
boundingBox = []
rect_num = len(rectangles)
for i in range(rect_num):
if cls_prob[i][1]>threshold:
rect = [rectangles[i][0], rectangles[i][1], rectangles[i][2], rectangles[i][3], cls_prob[i][1],
roi[i][0], roi[i][1], roi[i][2], roi[i][3],
pts[i][0], pts[i][5], pts[i][1], pts[i][6], pts[i][2], pts[i][7], pts[i][3], pts[i][8], pts[i][4], pts[i][9]]
boundingBox.append(rect)
rectangles = NMS(boundingBox, 0.7, 'iom')
rect = []
for rectangle in rectangles:
roi_w = rectangle[2]-rectangle[0]+1
roi_h = rectangle[3]-rectangle[1]+1
x1 = round(max(0, rectangle[0]+rectangle[5]*roi_w))
y1 = round(max(0, rectangle[1]+rectangle[6]*roi_h))
x2 = round(min(width, rectangle[2]+rectangle[7]*roi_w))
y2 = round(min(height, rectangle[3]+rectangle[8]*roi_h))
pt0 = rectangle[9]*roi_w + rectangle[0] - 1
pt1 = rectangle[10]*roi_h + rectangle[1] - 1
pt2 = rectangle[11]*roi_w + rectangle[0] - 1
pt3 = rectangle[12]*roi_h + rectangle[1] - 1
pt4 = rectangle[13]*roi_w + rectangle[0] - 1
pt5 = rectangle[14]*roi_h + rectangle[1] - 1
pt6 = rectangle[15]*roi_w + rectangle[0] - 1
pt7 = rectangle[16]*roi_h + rectangle[1] - 1
pt8 = rectangle[17]*roi_w + rectangle[0] - 1
pt9 = rectangle[18]*roi_h + rectangle[1] - 1
score = rectangle[4]
rect_ = np.round([x1, y1, x2, y2, pt0, pt1, pt2, pt3, pt4, pt5, pt6, pt7, pt8, pt9]).astype(int)
rect_ = np.append(rect_, score)
rect.append(rect_)
return rect
def calculateScales(img):
caffe_img = img.copy()
h, w, ch = caffe_img.shape
pr_scale = 1000.0/max(h, w)
w = int(w*pr_scale)
h = int(h*pr_scale)
scales = []
factor = 0.7937
factor_count = 0
minl = min(h, w)
while minl >= 12:
scales.append(pr_scale*pow(factor, factor_count))
minl *= factor
factor_count += 1
return scales
生成O-Net的hard-examples的代码类似,只需要把create_hard1.py中的12修改成24。
生成难样本之后的数据预处理和训练步骤与训练P-Net的步骤类似,具体参见上一篇。