本代码基于作者提供的python版本代码修改,参考:
https://github.com/DuinoDu/mtcnn/blob/master/demo.py (作者提供)
https://github.com/dlunion/mtcnn/blob/master/train/gen_48net_data2.py
注解:Pnet和RNet之后的网络的Bbox的矩形框,关键点在框内的当成正样本来训练landmark;
1,生成positive,negative,part三种样本,用作者的net1->net2生成bbox, 根据预测的bbox和ground truth计算IOU:
positive: IOU >= 0.65;
negative: IOU < 0.3;
part: 0.4 <= IOU < 0.65
代码如下:
- #!/usr/bin/env python
- # -*- coding: utf-8 -*-
- import _init_paths
- import caffe
- import cv2
- import numpy as np
- #from python_wrapper import *
- import os
- def bbreg(boundingbox, reg):
- reg = reg.T
- # calibrate bouding boxes
- if reg.shape[1] == 1:
- print "reshape of reg"
- pass # reshape of reg
- w = boundingbox[:,2] - boundingbox[:,0] + 1
- h = boundingbox[:,3] - boundingbox[:,1] + 1
- bb0 = boundingbox[:,0] + reg[:,0]*w
- bb1 = boundingbox[:,1] + reg[:,1]*h
- bb2 = boundingbox[:,2] + reg[:,2]*w
- bb3 = boundingbox[:,3] + reg[:,3]*h
- boundingbox[:,0:4] = np.array([bb0, bb1, bb2, bb3]).T
- #print "bb", boundingbox
- return boundingbox
- def pad(boxesA, w, h):
- boxes = boxesA.copy() # shit, value parameter!!!
- tmph = boxes[:,3] - boxes[:,1] + 1
- tmpw = boxes[:,2] - boxes[:,0] + 1
- numbox = boxes.shape[0]
- dx = np.ones(numbox)
- dy = np.ones(numbox)
- edx = tmpw
- edy = tmph
- x = boxes[:,0:1][:,0]
- y = boxes[:,1:2][:,0]
- ex = boxes[:,2:3][:,0]
- ey = boxes[:,3:4][:,0]
- tmp = np.where(ex > w)[0]
- if tmp.shape[0] != 0:
- edx[tmp] = -ex[tmp] + w-1 + tmpw[tmp]
- ex[tmp] = w-1
- tmp = np.where(ey > h)[0]
- if tmp.shape[0] != 0:
- edy[tmp] = -ey[tmp] + h-1 + tmph[tmp]
- ey[tmp] = h-1
- tmp = np.where(x < 1)[0]
- if tmp.shape[0] != 0:
- dx[tmp] = 2 - x[tmp]
- x[tmp] = np.ones_like(x[tmp])
- tmp = np.where(y < 1)[0]
- if tmp.shape[0] != 0:
- dy[tmp] = 2 - y[tmp]
- y[tmp] = np.ones_like(y[tmp])
- # for python index from 0, while matlab from 1
- dy = np.maximum(0, dy-1)
- dx = np.maximum(0, dx-1)
- y = np.maximum(0, y-1)
- x = np.maximum(0, x-1)
- edy = np.maximum(0, edy-1)
- edx = np.maximum(0, edx-1)
- ey = np.maximum(0, ey-1)
- ex = np.maximum(0, ex-1)
- return [dy, edy, dx, edx, y, ey, x, ex, tmpw, tmph]
- def IoU(box, boxes):
- """Compute IoU between detect box and gt boxes
- Parameters:
- ----------
- box: numpy array , shape (5, ): x1, y1, x2, y2, score
- input box
- boxes: numpy array, shape (n, 4): x1, y1, x2, y2
- input ground truth boxes
- Returns:
- -------
- ovr: numpy.array, shape (n, )
- IoU
- """
- box_area = (box[2] - box[0] + 1) * (box[3] - box[1] + 1)
- area = (boxes[:, 2] - boxes[:, 0] + 1) * (boxes[:, 3] - boxes[:, 1] + 1)
- xx1 = np.maximum(box[0], boxes[:, 0])
- yy1 = np.maximum(box[1], boxes[:, 1])
- xx2 = np.minimum(box[2], boxes[:, 2])
- yy2 = np.minimum(box[3], boxes[:, 3])
- # compute the width and height of the bounding box
- w = np.maximum(0, xx2 - xx1 + 1)
- h = np.maximum(0, yy2 - yy1 + 1)
- inter = w * h
- ovr = inter / (box_area + area - inter)
- return ovr
- def rerec(bboxA):
- # convert bboxA to square
- w = bboxA[:,2] - bboxA[:,0]
- h = bboxA[:,3] - bboxA[:,1]
- l = np.maximum(w,h).T
- bboxA[:,0] = bboxA[:,0] + w*0.5 - l*0.5
- bboxA[:,1] = bboxA[:,1] + h*0.5 - l*0.5
- bboxA[:,2:4] = bboxA[:,0:2] + np.repeat([l], 2, axis = 0).T
- return bboxA
- def nms(boxes, threshold, type):
- """nms
- :boxes: [:,0:5]
- :threshold: 0.5 like
- :type: 'Min' or others
- :returns: TODO
- """
- if boxes.shape[0] == 0:
- return np.array([])
- x1 = boxes[:,0]
- y1 = boxes[:,1]
- x2 = boxes[:,2]
- y2 = boxes[:,3]
- s = boxes[:,4]
- area = np.multiply(x2-x1+1, y2-y1+1)
- I = np.array(s.argsort()) # read s using I
- pick = [];
- while len(I) > 0:
- xx1 = np.maximum(x1[I[-1]], x1[I[0:-1]])
- yy1 = np.maximum(y1[I[-1]], y1[I[0:-1]])
- xx2 = np.minimum(x2[I[-1]], x2[I[0:-1]])
- yy2 = np.minimum(y2[I[-1]], y2[I[0:-1]])
- w = np.maximum(0.0, xx2 - xx1 + 1)
- h = np.maximum(0.0, yy2 - yy1 + 1)
- inter = w * h
- if type == 'Min':
- o = inter / np.minimum(area[I[-1]], area[I[0:-1]])
- else:
- o = inter / (area[I[-1]] + area[I[0:-1]] - inter)
- pick.append(I[-1])
- I = I[np.where( o <= threshold)[0]]
- return pick
- def generateBoundingBox(map, reg, scale, t):
- stride = 2
- cellsize = 12
- map = map.T
- dx1 = reg[0,:,:].T
- dy1 = reg[1,:,:].T
- dx2 = reg[2,:,:].T
- dy2 = reg[3,:,:].T
- (x, y) = np.where(map >= t)
- yy = y
- xx = x
- score = map[x,y]
- reg = np.array([dx1[x,y], dy1[x,y], dx2[x,y], dy2[x,y]])
- if reg.shape[0] == 0:
- pass
- boundingbox = np.array([yy, xx]).T
- bb1 = np.fix((stride * (boundingbox) + 1) / scale).T # matlab index from 1, so with "boundingbox-1"
- bb2 = np.fix((stride * (boundingbox) + cellsize - 1 + 1) / scale).T # while python don't have to
- score = np.array([score])
- boundingbox_out = np.concatenate((bb1, bb2, score, reg), axis=0)
- return boundingbox_out.T
- def drawBoxes(im, boxes):
- x1 = boxes[:,0]
- y1 = boxes[:,1]
- x2 = boxes[:,2]
- y2 = boxes[:,3]
- for i in range(x1.shape[0]):
- cv2.rectangle(im, (int(x1[i]), int(y1[i])), (int(x2[i]), int(y2[i])), (0,255,0), 1)
- return im
- def drawlandmark(im, points):
- for i in range(points.shape[0]):
- for j in range(5):
- cv2.circle(im, (int(points[i][j]), int(points[i][j+5])), 2, (255,0,0))
- return im
- from time import time
- _tstart_stack = []
- def tic():
- _tstart_stack.append(time())
- def toc(fmt="Elapsed: %s s"):
- print fmt % (time()-_tstart_stack.pop())
- def detect_face(img, minsize, PNet, RNet, threshold, fastresize, factor):
- img2 = img.copy()
- factor_count = 0
- total_boxes = np.zeros((0,9), np.float)
- points = []
- h = img.shape[0]
- w = img.shape[1]
- minl = min(h, w)
- img = img.astype(float)
- m = 12.0/minsize
- minl = minl*m
- # create scale pyramid
- scales = []
- while minl >= 12:
- scales.append(m * pow(factor, factor_count))
- minl *= factor
- factor_count += 1
- # first stage
- for scale in scales:
- hs = int(np.ceil(h*scale))
- ws = int(np.ceil(w*scale))
- if fastresize:
- im_data = (img-127.5)*0.0078125 # [0,255] -> [-1,1]
- im_data = cv2.resize(im_data, (ws,hs)) # default is bilinear
- else:
- im_data = cv2.resize(img, (ws,hs)) # default is bilinear
- im_data = (im_data-127.5)*0.0078125 # [0,255] -> [-1,1]
- #im_data = imResample(img, hs, ws); print "scale:", scale
- im_data = np.swapaxes(im_data, 0, 2)
- im_data = np.array([im_data], dtype = np.float)
- PNet.blobs['data'].reshape(1, 3, ws, hs)
- PNet.blobs['data'].data[...] = im_data
- out = PNet.forward()
- boxes = generateBoundingBox(out['prob1'][0,1,:,:], out['conv4-2'][0], scale, threshold[0])
- if boxes.shape[0] != 0:
- pick = nms(boxes, 0.5, 'Union')
- if len(pick) > 0 :
- boxes = boxes[pick, :]
- if boxes.shape[0] != 0:
- total_boxes = np.concatenate((total_boxes, boxes), axis=0)
- #np.save('total_boxes_101.npy', total_boxes)
- #####
- # 1 #
- #####
- # print "[1]:",total_boxes.shape[0]
- #print total_boxes
- #return total_boxes, []
- numbox = total_boxes.shape[0]
- if numbox > 0:
- # nms
- pick = nms(total_boxes, 0.7, 'Union')
- total_boxes = total_boxes[pick, :]
- # print "[2]:",total_boxes.shape[0]
- # revise and convert to square
- regh = total_boxes[:,3] - total_boxes[:,1]
- regw = total_boxes[:,2] - total_boxes[:,0]
- t1 = total_boxes[:,0] + total_boxes[:,5]*regw
- t2 = total_boxes[:,1] + total_boxes[:,6]*regh
- t3 = total_boxes[:,2] + total_boxes[:,7]*regw
- t4 = total_boxes[:,3] + total_boxes[:,8]*regh
- t5 = total_boxes[:,4]
- total_boxes = np.array([t1,t2,t3,t4,t5]).T
- total_boxes = rerec(total_boxes) # convert box to square
- # print "[4]:",total_boxes.shape[0]
- total_boxes[:,0:4] = np.fix(total_boxes[:,0:4])
- # print "[4.5]:",total_boxes.shape[0]
- #print total_boxes
- [dy, edy, dx, edx, y, ey, x, ex, tmpw, tmph] = pad(total_boxes, w, h)
- numbox = total_boxes.shape[0]
- if numbox > 0:
- # second stage
- # construct input for RNet
- tempimg = np.zeros((numbox, 24, 24, 3)) # (24, 24, 3, numbox)
- for k in range(numbox):
- tmp = np.zeros((int(tmph[k]) +1, int(tmpw[k]) + 1,3))
- tmp[int(dy[k]):int(edy[k])+1, int(dx[k]):int(edx[k])+1] = img[int(y[k]):int(ey[k])+1, int(x[k]):int(ex[k])+1]
- #print "y,ey,x,ex", y[k], ey[k], x[k], ex[k]
- #print "tmp", tmp.shape
- tempimg[k,:,:,:] = cv2.resize(tmp, (24, 24))
- #print tempimg.shape
- #print tempimg[0,0,0,:]
- tempimg = (tempimg-127.5)*0.0078125 # done in imResample function wrapped by python
- # RNet
- tempimg = np.swapaxes(tempimg, 1, 3)
- #print tempimg[0,:,0,0]
- RNet.blobs['data'].reshape(numbox, 3, 24, 24)
- RNet.blobs['data'].data[...] = tempimg
- out = RNet.forward()
- score = out['prob1'][:,1]
- #print 'score', score
- pass_t = np.where(score>threshold[1])[0]
- #print 'pass_t', pass_t
- score = np.array([score[pass_t]]).T
- total_boxes = np.concatenate( (total_boxes[pass_t, 0:4], score), axis = 1)
- # print "[5]:",total_boxes.shape[0]
- #print total_boxes
- #print "1.5:",total_boxes.shape
- mv = out['conv5-2'][pass_t, :].T
- #print "mv", mv
- if total_boxes.shape[0] > 0:
- pick = nms(total_boxes, 0.7, 'Union')
- # print 'pick', pick
- if len(pick) > 0:
- total_boxes = total_boxes[pick, :]
- # print "[6]:", total_boxes.shape[0]
- total_boxes = bbreg(total_boxes, mv[:, pick])
- # print "[7]:", total_boxes.shape[0]
- total_boxes = rerec(total_boxes)
- # print "[8]:", total_boxes.shape[0]
- return total_boxes
- def main():
- img_dir = "/home/xiao/code/mtcnn-caffe/prepare_data/WIDER_train/images/"
- imglistfile = "wider_face_train.txt"
- with open(imglistfile, 'r') as f:
- annotations = f.readlines()
- num = len(annotations)
- print "%d pics in total" % num
- neg_save_dir = "/media/xiao/软件/mtcnn/train/48/negative/"
- pos_save_dir = "/media/xiao/软件/mtcnn/train/48/positive/"
- part_save_dir = "/media/xiao/软件/mtcnn/train/48/part/"
- image_size = 48
- f1 = open('/media/xiao/软件/mtcnn/train/48/pos_48.txt', 'w')
- f2 = open('/media/xiao/软件/mtcnn/train/48/neg_48.txt', 'w')
- f3 = open('/media/xiao/软件/mtcnn/train/48/part_48.txt', 'w')
- p_idx = 0 # positive
- n_idx = 0 # negative
- d_idx = 0 # dont care
- image_idx = 0
- minsize = 20
- caffe_model_path = "./model"
- threshold = [0.6, 0.7, 0.7]
- factor = 0.709
- caffe.set_mode_gpu()
- PNet = caffe.Net(caffe_model_path+"/det1.prototxt", caffe_model_path+"/det1.caffemodel", caffe.TEST)
- RNet = caffe.Net(caffe_model_path+"/det2.prototxt", caffe_model_path+"/det2.caffemodel", caffe.TEST)
- for annotation in annotations:
- # imgpath = imgpath.split('\n')[0]
- annotation = annotation.strip().split(' ')
- bbox = map(float, annotation[1:])
- gts = np.array(bbox, dtype=np.float32).reshape(-1, 4)
- img_path = img_dir + annotation[0] + '.jpg'
- #print "######\n", img_path
- print image_idx
- image_idx += 1
- img = cv2.imread(img_path)
- img_matlab = img.copy()
- tmp = img_matlab[:,:,2].copy()
- img_matlab[:,:,2] = img_matlab[:,:,0]
- img_matlab[:,:,0] = tmp
- boundingboxes = detect_face(img_matlab, minsize, PNet, RNet, threshold, False, factor)
- #img = drawBoxes(img, boundingboxes)
- #cv2.imshow('img', img)
- #cv2.waitKey(1000)
- # generate positive,negative,part samples
- for box in boundingboxes:
- x_left, y_top, x_right, y_bottom, _ = box
- crop_w = x_right - x_left + 1
- crop_h = y_bottom - y_top + 1
- # ignore box that is too small or beyond image border
- if crop_w < image_size / 2 or crop_h < image_size / 2:
- continue
- if x_left < 0 or y_top < 0:
- continue
- # compute intersection over union(IoU) between current box and all gt boxes
- Iou = IoU(box, gts)
- cropped_im = img[int(y_top):int(y_bottom + 1) , int(x_left):int(x_right + 1) ]
- resized_im = cv2.resize(cropped_im, (image_size, image_size), interpolation=cv2.INTER_LINEAR)
- #try:
- # resized_im = cv2.resize(cropped_im, (image_size, image_size), interpolation=cv2.INTER_LINEAR)
- #except Exception as e:
- # print " 1 "
- # print e
- # save negative images and write label, 负样本
- if np.max(Iou) < 0.3:
- # Iou with all gts must below 0.3
- save_file = os.path.join(neg_save_dir, "%s.jpg" % n_idx)
- f2.write("%s/negative/%s.jpg" % (image_size, n_idx) + ' 0')
- f2.write(" -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1\n")
- cv2.imwrite(save_file, resized_im)
- n_idx += 1
- else:
- # find gt_box with the highest iou
- idx = np.argmax(Iou)
- assigned_gt = gts[idx]
- x1, y1, x2, y2 = assigned_gt
- # compute bbox reg label,offset_x1,offset_y1相对于左上角; offset_x2,offset_y2相对于右上角
- offset_x1 = (x1 - x_left) / float(crop_w)
- offset_y1 = (y1 - y_top) / float(crop_h)
- # offset_x2 = (x2 - x_left) / float(crop_w)
- # offset_y2 = (y2 - y_top) / float(crop_h)
- offset_x2 = (x2 - x_right) / float(crop_w)
- offset_y2 = (y2 - y_bottom )/ float(crop_h)
- # save positive and part-face images and write labels, 正样本
- if np.max(Iou) >= 0.65:
- save_file = os.path.join(pos_save_dir, "%s.jpg" % p_idx)
- f1.write("%s/positive/%s.jpg" % (image_size, p_idx) + ' 1 %.6f %.6f %.6f %.6f' % (offset_x1, offset_y1, offset_x2, offset_y2))
- f1.write(" -1 -1 -1 -1 -1 -1 -1 -1 -1 -1\n")
- cv2.imwrite(save_file, resized_im)
- p_idx += 1
- # part 样本
- elif np.max(Iou) >= 0.4:
- save_file = os.path.join(part_save_dir, "%s.jpg" % d_idx)
- f3.write("%s/part/%s.jpg" % (image_size, d_idx) + ' -1 %.6f %.6f %.6f %.6f' % (offset_x1, offset_y1, offset_x2, offset_y2))
- f3.write(" -1 -1 -1 -1 -1 -1 -1 -1 -1 -1\n")
- cv2.imwrite(save_file, resized_im)
- d_idx += 1
- f.close()
- f1.close()
- f2.close()
- f3.close()
- if __name__ == "__main__":
- main()
- </span>
2,生成landmark样本,用作者的net1->net2生成bbox, 根据5个landmark是否都在bbox中作为判别条件:
代码如下:
- #!/usr/bin/env python
- # -*- coding: utf-8 -*-
- import _init_paths
- import caffe
- import cv2
- import numpy as np
- #from python_wrapper import *
- import os
- def bbreg(boundingbox, reg):
- reg = reg.T
- # calibrate bouding boxes
- if reg.shape[1] == 1:
- print "reshape of reg"
- pass # reshape of reg
- w = boundingbox[:,2] - boundingbox[:,0] + 1
- h = boundingbox[:,3] - boundingbox[:,1] + 1
- bb0 = boundingbox[:,0] + reg[:,0]*w
- bb1 = boundingbox[:,1] + reg[:,1]*h
- bb2 = boundingbox[:,2] + reg[:,2]*w
- bb3 = boundingbox[:,3] + reg[:,3]*h
- boundingbox[:,0:4] = np.array([bb0, bb1, bb2, bb3]).T
- #print "bb", boundingbox
- return boundingbox
- def pad(boxesA, w, h):
- boxes = boxesA.copy() # shit, value parameter!!!
- tmph = boxes[:,3] - boxes[:,1] + 1
- tmpw = boxes[:,2] - boxes[:,0] + 1
- numbox = boxes.shape[0]
- dx = np.ones(numbox)
- dy = np.ones(numbox)
- edx = tmpw
- edy = tmph
- x = boxes[:,0:1][:,0]
- y = boxes[:,1:2][:,0]
- ex = boxes[:,2:3][:,0]
- ey = boxes[:,3:4][:,0]
- tmp = np.where(ex > w)[0]
- if tmp.shape[0] != 0:
- edx[tmp] = -ex[tmp] + w-1 + tmpw[tmp]
- ex[tmp] = w-1
- tmp = np.where(ey > h)[0]
- if tmp.shape[0] != 0:
- edy[tmp] = -ey[tmp] + h-1 + tmph[tmp]
- ey[tmp] = h-1
- tmp = np.where(x < 1)[0]
- if tmp.shape[0] != 0:
- dx[tmp] = 2 - x[tmp]
- x[tmp] = np.ones_like(x[tmp])
- tmp = np.where(y < 1)[0]
- if tmp.shape[0] != 0:
- dy[tmp] = 2 - y[tmp]
- y[tmp] = np.ones_like(y[tmp])
- # for python index from 0, while matlab from 1
- dy = np.maximum(0, dy-1)
- dx = np.maximum(0, dx-1)
- y = np.maximum(0, y-1)
- x = np.maximum(0, x-1)
- edy = np.maximum(0, edy-1)
- edx = np.maximum(0, edx-1)
- ey = np.maximum(0, ey-1)
- ex = np.maximum(0, ex-1)
- return [dy, edy, dx, edx, y, ey, x, ex, tmpw, tmph]
- def IoU(box, boxes):
- """Compute IoU between detect box and gt boxes
- Parameters:
- ----------
- box: numpy array , shape (5, ): x1, y1, x2, y2, score
- input box
- boxes: numpy array, shape (n, 4): x1, y1, x2, y2
- input ground truth boxes
- Returns:
- -------
- ovr: numpy.array, shape (n, )
- IoU
- """
- box_area = (box[2] - box[0] + 1) * (box[3] - box[1] + 1)
- area = (boxes[:, 2] - boxes[:, 0] + 1) * (boxes[:, 3] - boxes[:, 1] + 1)
- xx1 = np.maximum(box[0], boxes[:, 0])
- yy1 = np.maximum(box[1], boxes[:, 1])
- xx2 = np.minimum(box[2], boxes[:, 2])
- yy2 = np.minimum(box[3], boxes[:, 3])
- # compute the width and height of the bounding box
- w = np.maximum(0, xx2 - xx1 + 1)
- h = np.maximum(0, yy2 - yy1 + 1)
- inter = w * h
- ovr = inter / (box_area + area - inter)
- return ovr
- def rerec(bboxA):
- # convert bboxA to square
- w = bboxA[:,2] - bboxA[:,0]
- h = bboxA[:,3] - bboxA[:,1]
- l = np.maximum(w,h).T
- bboxA[:,0] = bboxA[:,0] + w*0.5 - l*0.5
- bboxA[:,1] = bboxA[:,1] + h*0.5 - l*0.5
- bboxA[:,2:4] = bboxA[:,0:2] + np.repeat([l], 2, axis = 0).T
- return bboxA
- def nms(boxes, threshold, type):
- """nms
- :boxes: [:,0:5]
- :threshold: 0.5 like
- :type: 'Min' or others
- :returns: TODO
- """
- if boxes.shape[0] == 0:
- return np.array([])
- x1 = boxes[:,0]
- y1 = boxes[:,1]
- x2 = boxes[:,2]
- y2 = boxes[:,3]
- s = boxes[:,4]
- area = np.multiply(x2-x1+1, y2-y1+1)
- I = np.array(s.argsort()) # read s using I
- pick = [];
- while len(I) > 0:
- xx1 = np.maximum(x1[I[-1]], x1[I[0:-1]])
- yy1 = np.maximum(y1[I[-1]], y1[I[0:-1]])
- xx2 = np.minimum(x2[I[-1]], x2[I[0:-1]])
- yy2 = np.minimum(y2[I[-1]], y2[I[0:-1]])
- w = np.maximum(0.0, xx2 - xx1 + 1)
- h = np.maximum(0.0, yy2 - yy1 + 1)
- inter = w * h
- if type == 'Min':
- o = inter / np.minimum(area[I[-1]], area[I[0:-1]])
- else:
- o = inter / (area[I[-1]] + area[I[0:-1]] - inter)
- pick.append(I[-1])
- I = I[np.where( o <= threshold)[0]]
- return pick
- def generateBoundingBox(map, reg, scale, t):
- stride = 2
- cellsize = 12
- map = map.T
- dx1 = reg[0,:,:].T
- dy1 = reg[1,:,:].T
- dx2 = reg[2,:,:].T
- dy2 = reg[3,:,:].T
- (x, y) = np.where(map >= t)
- yy = y
- xx = x
- score = map[x,y]
- reg = np.array([dx1[x,y], dy1[x,y], dx2[x,y], dy2[x,y]])
- if reg.shape[0] == 0:
- pass
- boundingbox = np.array([yy, xx]).T
- bb1 = np.fix((stride * (boundingbox) + 1) / scale).T # matlab index from 1, so with "boundingbox-1"
- bb2 = np.fix((stride * (boundingbox) + cellsize - 1 + 1) / scale).T # while python don't have to
- score = np.array([score])
- boundingbox_out = np.concatenate((bb1, bb2, score, reg), axis=0)
- return boundingbox_out.T
- def drawBoxes(im, boxes):
- x1 = boxes[:,0]
- y1 = boxes[:,1]
- x2 = boxes[:,2]
- y2 = boxes[:,3]
- for i in range(x1.shape[0]):
- cv2.rectangle(im, (int(x1[i]), int(y1[i])), (int(x2[i]), int(y2[i])), (0,255,0), 1)
- return im
- def drawBoxes_align(im, boxe):
- x1 = boxe[0]
- y1 = boxe[1]
- x2 = boxe[2]
- y2 = boxe[3]
- cv2.rectangle(im, (int(x1), int(y1)), (int(x2), int(y2)), (0,255,0), 1)
- return im
- def drawlandmark(im, points):
- for i in range(points.shape[0]):
- for j in range(5):
- cv2.circle(im, (int(points[i][j]), int(points[i][j+5])), 2, (255,0,0))
- return im
- def drawlandmark_align(im, point):
- for j in range(5):
- cv2.circle(im, (int(point[j*2]), int(point[j*2+1])), 2, (255,0,0))
- return im
- from time import time
- _tstart_stack = []
- def tic():
- _tstart_stack.append(time())
- def toc(fmt="Elapsed: %s s"):
- print fmt % (time()-_tstart_stack.pop())
- def detect_face(img, minsize, PNet, RNet, threshold, fastresize, factor):
- img2 = img.copy()
- factor_count = 0
- total_boxes = np.zeros((0,9), np.float)
- points = []
- h = img.shape[0]
- w = img.shape[1]
- minl = min(h, w)
- img = img.astype(float)
- m = 12.0/minsize
- minl = minl*m
- # create scale pyramid
- scales = []
- while minl >= 12:
- scales.append(m * pow(factor, factor_count))
- minl *= factor
- factor_count += 1
- # first stage
- for scale in scales:
- hs = int(np.ceil(h*scale))
- ws = int(np.ceil(w*scale))
- if fastresize:
- im_data = (img-127.5)*0.0078125 # [0,255] -> [-1,1]
- im_data = cv2.resize(im_data, (ws,hs)) # default is bilinear
- else:
- im_data = cv2.resize(img, (ws,hs)) # default is bilinear
- im_data = (im_data-127.5)*0.0078125 # [0,255] -> [-1,1]
- #im_data = imResample(img, hs, ws); print "scale:", scale
- im_data = np.swapaxes(im_data, 0, 2)
- im_data = np.array([im_data], dtype = np.float)
- PNet.blobs['data'].reshape(1, 3, ws, hs)
- PNet.blobs['data'].data[...] = im_data
- out = PNet.forward()
- boxes = generateBoundingBox(out['prob1'][0,1,:,:], out['conv4-2'][0], scale, threshold[0])
- if boxes.shape[0] != 0:
- pick = nms(boxes, 0.5, 'Union')
- if len(pick) > 0 :
- boxes = boxes[pick, :]
- if boxes.shape[0] != 0:
- total_boxes = np.concatenate((total_boxes, boxes), axis=0)
- #np.save('total_boxes_101.npy', total_boxes)
- #####
- # 1 #
- #####
- # print "[1]:",total_boxes.shape[0]
- #print total_boxes
- #return total_boxes, []
- numbox = total_boxes.shape[0]
- if numbox > 0:
- # nms
- pick = nms(total_boxes, 0.7, 'Union')
- total_boxes = total_boxes[pick, :]
- # print "[2]:",total_boxes.shape[0]
- # revise and convert to square
- regh = total_boxes[:,3] - total_boxes[:,1]
- regw = total_boxes[:,2] - total_boxes[:,0]
- t1 = total_boxes[:,0] + total_boxes[:,5]*regw
- t2 = total_boxes[:,1] + total_boxes[:,6]*regh
- t3 = total_boxes[:,2] + total_boxes[:,7]*regw
- t4 = total_boxes[:,3] + total_boxes[:,8]*regh
- t5 = total_boxes[:,4]
- total_boxes = np.array([t1,t2,t3,t4,t5]).T
- total_boxes = rerec(total_boxes) # convert box to square
- # print "[4]:",total_boxes.shape[0]
- total_boxes[:,0:4] = np.fix(total_boxes[:,0:4])
- # print "[4.5]:",total_boxes.shape[0]
- #print total_boxes
- [dy, edy, dx, edx, y, ey, x, ex, tmpw, tmph] = pad(total_boxes, w, h)
- numbox = total_boxes.shape[0]
- if numbox > 0:
- # second stage
- # construct input for RNet
- tempimg = np.zeros((numbox, 24, 24, 3)) # (24, 24, 3, numbox)
- for k in range(numbox):
- tmp = np.zeros((int(tmph[k]) +1, int(tmpw[k]) + 1,3))
- tmp[int(dy[k]):int(edy[k])+1, int(dx[k]):int(edx[k])+1] = img[int(y[k]):int(ey[k])+1, int(x[k]):int(ex[k])+1]
- #print "y,ey,x,ex", y[k], ey[k], x[k], ex[k]
- #print "tmp", tmp.shape
- tempimg[k,:,:,:] = cv2.resize(tmp, (24, 24))
- #print tempimg.shape
- #print tempimg[0,0,0,:]
- tempimg = (tempimg-127.5)*0.0078125 # done in imResample function wrapped by python
- # RNet
- tempimg = np.swapaxes(tempimg, 1, 3)
- #print tempimg[0,:,0,0]
- RNet.blobs['data'].reshape(numbox, 3, 24, 24)
- RNet.blobs['data'].data[...] = tempimg
- out = RNet.forward()
- score = out['prob1'][:,1]
- #print 'score', score
- pass_t = np.where(score>threshold[1])[0]
- #print 'pass_t', pass_t
- score = np.array([score[pass_t]]).T
- total_boxes = np.concatenate( (total_boxes[pass_t, 0:4], score), axis = 1)
- # print "[5]:",total_boxes.shape[0]
- #print total_boxes
- #print "1.5:",total_boxes.shape
- mv = out['conv5-2'][pass_t, :].T
- #print "mv", mv
- if total_boxes.shape[0] > 0:
- pick = nms(total_boxes, 0.7, 'Union')
- # print 'pick', pick
- if len(pick) > 0:
- total_boxes = total_boxes[pick, :]
- # print "[6]:", total_boxes.shape[0]
- total_boxes = bbreg(total_boxes, mv[:, pick])
- # print "[7]:", total_boxes.shape[0]
- total_boxes = rerec(total_boxes)
- # print "[8]:", total_boxes.shape[0]
- return total_boxes
- def main():
- img_dir = "/media/xiao/学习/face_alignment/data/CelebA/Img/img_celeba.7z/img_celeba/"
- anno_file = "celebA_bbox_landmark.txt"
- with open(anno_file, 'r') as f:
- annotations = f.readlines()
- num = len(annotations)
- print "%d pics in total" % num
- # 图片大小为48*48
- image_size = 48
- # landmark_save_dir = "48/landmark/"
- landmark_save_dir = "/media/xiao/软件/mtcnn/train/48/landmark/"
- # save_dir = "./" + str(image_size)
- f1 = open('/media/xiao/软件/mtcnn/train/48/landmark_48.txt', 'w')
- l_idx = 0 # landmark
- image_idx = 0
- minsize = 40
- caffe_model_path = "./model"
- threshold = [0.6, 0.7, 0.7]
- factor = 0.709
- caffe.set_mode_gpu()
- PNet = caffe.Net(caffe_model_path+"/det1.prototxt", caffe_model_path+"/det1.caffemodel", caffe.TEST)
- RNet = caffe.Net(caffe_model_path+"/det2.prototxt", caffe_model_path+"/det2.caffemodel", caffe.TEST)
- for annotation in annotations:
- # imgpath = imgpath.split('\n')[0]
- annotation = annotation.strip().split(' ')
- im_path = annotation[0]
- # bbox = map(float, annotation[1:-10])
- pts = map(float, annotation[-10:])
- # boxes = np.array(bbox, dtype=np.float32).reshape(-1, 4)
- im_path = img_dir + im_path
- backupPts = pts[:]
- #print "######\n", img_path
- print image_idx
- image_idx += 1
- img = cv2.imread(im_path)
- img_matlab = img.copy()
- tmp = img_matlab[:,:,2].copy()
- img_matlab[:,:,2] = img_matlab[:,:,0]
- img_matlab[:,:,0] = tmp
- # 用作者提供的net1->net2生成人脸框
- boundingboxes = detect_face(img_matlab, minsize, PNet, RNet, threshold, False, factor)
- #img = drawBoxes(img, boundingboxes)
- #cv2.imshow('img', img)
- #cv2.waitKey(1000)
- # generate landmark samples
- for box in boundingboxes:
- x_left, y_top, x_right, y_bottom, _ = box # 得到人脸bbox坐标
- crop_w = x_right - x_left + 1
- crop_h = y_bottom - y_top + 1
- # ignore box that is too small or beyond image border
- if crop_w < image_size / 2 or crop_h < image_size / 2:
- continue
- if x_left < 0 or y_top < 0:
- continue
- # 当五个landmark都在bbox中时,这样的样本作为我们的landmark训练样本
- if pts[0] < x_left or pts[0] > x_right:
- continue
- if pts[2] < x_left or pts[2] > x_right:
- continue
- if pts[4] < x_left or pts[4] > x_right:
- continue
- if pts[6] < x_left or pts[6] > x_right:
- continue
- if pts[8] < x_left or pts[8] > x_right:
- continue
- if pts[1] < y_top or pts[1] > y_bottom:
- continue
- if pts[3] < y_top or pts[3] > y_bottom:
- continue
- if pts[5] < y_top or pts[5] > y_bottom:
- continue
- if pts[7] < y_top or pts[7] > y_bottom:
- continue
- if pts[9] < y_top or pts[9] > y_bottom:
- continue
- # show image
- #img1 = drawBoxes_align(img, box)
- #img1 = drawlandmark_align(img1, pts)
- #cv2.imshow('img', img1)
- #cv2.waitKey(1000)
- # our method, x0,y0,x1,y1,x2,y2,x3,y3,x4,y4
- #for k in range(len(pts) / 2):
- # pts[k * 2] = (pts[k * 2] - x_left) / float(crop_w);
- # pts[k * 2 + 1] = (pts[k * 2 + 1] - y_top) / float(crop_h);
- #author method, x0,x1,x2,x3,x4,y0,y1,y2,y3,y4
- ptsss = pts[:]
- # landmark标注为其相对于crop区域左上角的归一化值
- for k in range(len(ptsss) / 2):
- pts[k] = (ptsss[k * 2] - x_left) / float(crop_w);
- pts[5+k] = (ptsss[k * 2 + 1] - y_top) / float(crop_h);
- # 将crop区域进行resize
- cropped_im = img[int(y_top):int(y_bottom + 1), int(x_left):int(x_right + 1)]
- resized_im = cv2.resize(cropped_im, (image_size, image_size), interpolation=cv2.INTER_LINEAR)
- # box_ = box.reshape(1, -1)
- # 写图片名,class坐标,bbox坐标。
- save_file = os.path.join(landmark_save_dir, "%s.jpg" % l_idx)
- f1.write(str(image_size) + "/landmark/%s.jpg" % l_idx + ' -1 -1 -1 -1 -1')
- # 写landmark坐标并保存图片
- for k in range(len(pts)):
- f1.write(" %f" % pts[k])
- f1.write("\n")
- cv2.imwrite(save_file, resized_im)
- l_idx += 1
- f.close()
- f1.close()
- if __name__ == "__main__":
- main()