总之,经过了长时间的前期准备,我开辟了自己的新的研究方向——人脸检测和人脸配准。光说不练假把式,读了那么多文章,我能够详细了解人脸配准的整体思想,但是没有动手实践是没有任何用处的,于是我在自己的笔记本上仿真一篇文章,Deep Convolutional Network Cascade for Facial Point Detection。
这篇文章主要思想框架如下:
主要仿真的是Deep CNN F1网络,整个框架太复杂,于是我只是实现基本的第一层。
1)首先下载本文文章的数据http://mmlab.ie.cuhk.edu.hk/archive/CNN_FacePoint.htm,这个项目主页中有两个可执行文件,不过没啥用,运行不起来。
2)第二,虽然有了数据库但是不一定能使用,需要转化为HDF5文件才能使用。
import sys
sys.path.append(r'C:\Users\改路径\Desktop\CNN-Face\dataset')#这两行是为了加载utils模块
import utils
import os
from os.path import join, exists
import cv2 #这个需要配置opencv,然后调用
import numpy as np
import h5py
from utils import flip, getDataFromTxt, rotate, logger, shuffle_in_unison_scary, processImage, createDir
TRAIN = 'C:/Users/改/Desktop/train'
OUTPUT = 'C:/Users/wenjie/Desktop/train'
if not exists(OUTPUT): os.mkdir(OUTPUT)
assert(exists(TRAIN) and exists(OUTPUT))
def generate_hdf5_data(filelist, output, fname, argument=False):
data = getDataFromTxt(filelist)
F_imgs = []
F_landmarks = []
EN_imgs = []
EN_landmarks = []
NM_imgs = []
NM_landmarks = []
for (imgPath, bbox, landmarkGt) in data:
img = cv2.imread(imgPath, cv2.CV_LOAD_IMAGE_GRAYSCALE)
assert(img is not None)
logger("process %s" % imgPath)
# Paper Table2 jitter F-layer
f_bbox = bbox.subBBox(-0.05, 1.05, -0.05, 1.05)
#f_bbox = bbox.subBBox(0, 1, 0, 1)
f_face = img[f_bbox.top:f_bbox.bottom+1,f_bbox.left:f_bbox.right+1]
## data argument
if argument and np.random.rand() > -1:
### flip
face_flipped, landmark_flipped = flip(f_face, landmarkGt)
face_flipped = cv2.resize(face_flipped, (39, 39))
F_imgs.append(face_flipped.reshape((1, 39, 39)))
F_landmarks.append(landmark_flipped.reshape(10))
### rotation +5 degrees
if np.random.rand() > 0.5:
face_rotated_by_alpha, landmark_rotated = rotate(img, f_bbox, \
bbox.reprojectLandmark(landmarkGt), 5)
landmark_rotated = bbox.projectLandmark(landmark_rotated)
face_rotated_by_alpha = cv2.resize(face_rotated_by_alpha, (39, 39))
F_imgs.append(face_rotated_by_alpha.reshape((1, 39, 39)))
F_landmarks.append(landmark_rotated.reshape(10))
### flip with rotation
face_flipped, landmark_flipped = flip(face_rotated_by_alpha, landmark_rotated)
face_flipped = cv2.resize(face_flipped, (39, 39))
F_imgs.append(face_flipped.reshape((1, 39, 39)))
F_landmarks.append(landmark_flipped.reshape(10))
### rotation -5 degrees
if np.random.rand() > 0.5:
face_rotated_by_alpha, landmark_rotated = rotate(img, f_bbox, \
bbox.reprojectLandmark(landmarkGt), -5)
landmark_rotated = bbox.projectLandmark(landmark_rotated)
face_rotated_by_alpha = cv2.resize(face_rotated_by_alpha, (39, 39))
F_imgs.append(face_rotated_by_alpha.reshape((1, 39, 39)))
F_landmarks.append(landmark_rotated.reshape(10))
### flip with rotation
face_flipped, landmark_flipped = flip(face_rotated_by_alpha, landmark_rotated)
face_flipped = cv2.resize(face_flipped, (39, 39))
F_imgs.append(face_flipped.reshape((1, 39, 39)))
F_landmarks.append(landmark_flipped.reshape(10))
f_face = cv2.resize(f_face, (39, 39))
en_face = f_face[:31, :]
nm_face = f_face[8:, :]
f_face = f_face.reshape((1, 39, 39))
f_landmark = landmarkGt.reshape((10))
F_imgs.append(f_face)
F_landmarks.append(f_landmark)
## data argument for EN
if argument and np.random.rand() > 0.5:
### flip
face_flipped, landmark_flipped = flip(en_face, landmarkGt)
face_flipped = cv2.resize(face_flipped, (31, 39)).reshape((1, 31, 39))
landmark_flipped = landmark_flipped[:3, :].reshape((6))
EN_imgs.append(face_flipped)
EN_landmarks.append(landmark_flipped)
en_face = cv2.resize(en_face, (31, 39)).reshape((1, 31, 39))
en_landmark = landmarkGt[:3, :].reshape((6))
EN_imgs.append(en_face)
EN_landmarks.append(en_landmark)
## data argument for NM
if argument and np.random.rand() > 0.5:
### flip
face_flipped, landmark_flipped = flip(nm_face, landmarkGt)
face_flipped = cv2.resize(face_flipped, (31, 39)).reshape((1, 31, 39))
landmark_flipped = landmark_flipped[2:, :].reshape((6))
NM_imgs.append(face_flipped)
NM_landmarks.append(landmark_flipped)
nm_face = cv2.resize(nm_face, (31, 39)).reshape((1, 31, 39))
nm_landmark = landmarkGt[2:, :].reshape((6))
NM_imgs.append(nm_face)
NM_landmarks.append(nm_landmark)
#Convert the list to array
F_imgs, F_landmarks = np.asarray(F_imgs), np.asarray(F_landmarks)
EN_imgs, EN_landmarks = np.asarray(EN_imgs), np.asarray(EN_landmarks)
NM_imgs, NM_landmarks = np.asarray(NM_imgs),np.asarray(NM_landmarks)
### normalize the data and shu
F_imgs = processImage(F_imgs)
shuffle_in_unison_scary(F_imgs, F_landmarks)
EN_imgs = processImage(EN_imgs)
shuffle_in_unison_scary(EN_imgs, EN_landmarks)
NM_imgs = processImage(NM_imgs)
shuffle_in_unison_scary(NM_imgs, NM_landmarks)
# full face
base = join(OUTPUT, '1_F')
createDir(base)
output = join(base, fname)
logger("generate %s" % output)
with h5py.File(output, 'w') as h5:
h5['data'] = F_imgs.astype(np.float32)
h5['landmark'] = F_landmarks.astype(np.float32)
# eye and nose
base = join(OUTPUT, '1_EN')
createDir(base)
output = join(base, fname)
logger("generate %s" % output)
with h5py.File(output, 'w') as h5:
h5['data'] = EN_imgs.astype(np.float32)
h5['landmark'] = EN_landmarks.astype(np.float32)
# nose and mouth
base = join(OUTPUT, '1_NM')
createDir(base)
output = join(base, fname)
logger("generate %s" % output)
with h5py.File(output, 'w') as h5:
h5['data'] = NM_imgs.astype(np.float32)
h5['landmark'] = NM_landmarks.astype(np.float32)
if __name__ == '__main__':
# train data
train_txt = join(TRAIN, 'trainImageList.txt')
generate_hdf5_data(train_txt, OUTPUT, 'train.h5', argument=True)
test_txt = join(TRAIN, 'testImageList.txt')
generate_hdf5_data(test_txt, OUTPUT, 'test.h5')
with open(join(OUTPUT, '1_F/train.txt'), 'w') as fd:
fd.write('train/1_F/train.h5')
with open(join(OUTPUT, '1_EN/train.txt'), 'w') as fd:
fd.write('train/1_EN/train.h5')
with open(join(OUTPUT, '1_NM/train.txt'), 'w') as fd:
fd.write('train/1_NM/train.h5')
with open(join(OUTPUT, '1_F/test.txt'), 'w') as fd:
fd.write('train/1_F/test.h5')
with open(join(OUTPUT, '1_EN/test.txt'), 'w') as fd:
fd.write('train/1_EN/test.h5')
with open(join(OUTPUT, '1_NM/test.txt'), 'w') as fd:
fd.write('train/1_NM/test.h5')
# Done
然后得到训练数据,如图:
3、训练,有了数据就可以训练了。老规矩:新建txt然后改为bat文件
.\Build\x64\Release\caffe.exe train --solver=.\try1_2\1_F_solver.prototxt
pause
4、测试。
Build\x64\Release\caffe.exe test --model=try1_2\1_F_train.prototxt -weights=try1_2\_iter_50000.caffemodel
pause
这错误率好的吓人。
5)检测代码
找到了没成功,让我再研究一下。