融合流程
- 检测面部标记
- 旋转、缩放和转换第二张图像,使之与第一张图像相适应
- 调整第二张图像的色彩平衡,使之与第一个相匹配
- 把第二张图像的特性混合在第一张图像中
实现流程
找到人脸矩阵
-
使用dlib提取面部标记
用Dlib实现了论文One Millisecond Face Alignment with an Ensemble of Regression Trees中的算法
( http://www.csc.kth.se/~vahidk/papers/KazemiCVPR14.pdf ,作者为Vahid Kazemi 和Josephine Sullivan) 。
算法本身非常复杂,但dlib接口使用起来非常简单
-
人脸矩阵是由68个像素点组成,根据对应的像素的对应相应的五官
-
1-16代码脸型
-
17-21 22-26 分别代表眉毛
-
27-35代表鼻子
-
36-41 42-47 代码眼睛
-
48-68 带表嘴巴
get_landmarks()函数将一个图像转化成numpy数组,并返回一个68 x2元素矩阵,输入图像的每个特征点对应每行的一个x,y坐标。
特征提取器(predictor)要一个粗糙的边界框作为算法输入,由传统的能返回一个矩形列表的人脸检测器(detector)提供,其每个矩形列表在图像中对应一个脸。
为了构建特征提取器,预训练模型必不可少,相关模型可从dlib sourceforge库下载
( http://sourceforge.net/projects/dclib/files/dlib/v18.10/shape_predictor_68_face_landmarks.dat.bz2 )。
调整脸部图像对齐
原理
现在我们已经有了两个标记矩阵,每行有一组坐标对应一个特定的面部特征(如第30行给出的鼻子的坐标)。我们现在要搞清楚如何旋转、翻译和规模化第一个向量, 使它们尽可能适合第二个向量的点。想法是,可以用相同的变换在第一个图像上覆盖第二个图像。
实现步骤
代码分别实现了下面几步:
- 1.将输入矩阵转换为浮点数。这是之后步骤的必要条件。
- 2.每一个点集减去它的矩心。一旦为这两个新的点集找到了一个最佳的缩放和旋转方法,这两个矩心c1和c2就可以用来找到完整的解决方案。
- 3.同样,每一个点集除以它的标准偏差。这消除了问题的组件缩放偏差。
- 4.使用Singular Value Decomposition计算旋转部分。可以在维基百科上看到关于解决正交普氏问题的细节( https://en.wikipedia.org/wiki/Orthogonal_Procrustes_problem )。
5.利用透视变换矩阵( https://en.wikipedia.org/wiki/Transformation_matrix#Affine_transformations )返回完整的转化。
源代码
#include <string.h>
#include <jni.h>
#include <android/log.h>
#include <string>
#include <stdio.h>
#include <vector>
#include <iostream>
#include <opencv2/opencv.hpp>
#include <opencv2/core/core.hpp>
#include <opencv2/imgproc/imgproc.hpp>
#include <opencv2/highgui/highgui.hpp>
#include <dlib/image_processing/frontal_face_detector.h>
#include <dlib/image_processing/render_face_detections.h>
#include <dlib/image_loader/load_image.h>
#include <dlib/image_processing.h>
#include <dlib/opencv/cv_image.h>
#include <dlib/gui_widgets.h>
#include <dlib/image_io.h>
#include <dlib/opencv.h>
#include <iostream>
using namespace dlib;
using namespace std;
using namespace cv;
#define LOG_TAG "People_Det-JNI"
#define LOGV(...) __android_log_print(ANDROID_LOG_VERBOSE, LOG_TAG, __VA_ARGS__)
#define LOGD(...) __android_log_print(ANDROID_LOG_DEBUG, LOG_TAG, __VA_ARGS__)
#define LOGI(...) __android_log_print(ANDROID_LOG_INFO, LOG_TAG, __VA_ARGS__)
#define LOGW(...) __android_log_print(ANDROID_LOG_WARN, LOG_TAG, __VA_ARGS__)
#define LOGE(...) __android_log_print(ANDROID_LOG_ERROR, LOG_TAG, __VA_ARGS__)
/*
#ifdef __cplusplus
extern "C" {
#endif
*/
static dlib::shape_predictor msp;
static frontal_face_detector detector;
static dlib::full_object_detection FACE_MARK1,FACE_MARK2;
static Mat wrap_dst, src_img2, mask;
dlib::full_object_detection detectface(dlib::cv_image<dlib::bgr_pixel> mat)
{
dlib::full_object_detection mark;
std::vector<dlib::rectangle> mRets;
mRets = detector(mat);
if(mRets.size() > 0)
mark = msp(mat, mRets[0]);
return mark;
}
jboolean facesRotate_UseData(JNIEnv* env, jobject thiz,
jintArray face1,jint w1,jint h1,
jintArray face2,jint w2,jint h2)
{
jint *cbuf1;
jint *cbuf2;
cbuf1 = env->GetIntArrayElements(face1, JNI_FALSE);
cbuf2 = env->GetIntArrayElements(face2, JNI_FALSE);
if(cbuf1 == NULL || cbuf1 == NULL){
return JNI_FALSE;
}
Mat src_img(h1, w1, CV_8UC4, (unsigned char*)cbuf1);
Mat src_img1(h2, w2, CV_8UC4, (unsigned char*)cbuf2);
cvtColor(src_img,src_img,CV_RGBA2RGB);
cvtColor(src_img1,src_img1,CV_RGBA2RGB);
if(!src_img.data || !src_img1.data)
LOGD("data error");
dlib::cv_image<dlib::bgr_pixel> img(src_img);
dlib::cv_image<dlib::bgr_pixel> img1(src_img1);
FACE_MARK1 = detectface(img);
FACE_MARK2 = detectface(img1);
std::vector<cv::Point2f> srcTri;
std::vector<cv::Point2f> dstTri;
for(int i = 0;i < 68;i++) {
srcTri.push_back(cv::Point2f(FACE_MARK1.part(i).x(),FACE_MARK1.part(i).y()));
dstTri.push_back(cv::Point2f(FACE_MARK2.part(i).x(),FACE_MARK2.part(i).y()));
}
cv::Mat warp_mat;
warp_mat = cv::findHomography(srcTri,dstTri,0);
//the faceLine's size is 68,including all face data,such as:
//the index 0 - 16:jaw data
//the index 17 - 68:face data
//the index 17 - 21:right brow data
//the index 22 - 26:left brow data
//the index 27 - 34:nose data
//the index 36 - 41:right eye data
//the index 42 - 47:left eye data
//the index 48 - 60:mouth data
src_img2 = src_img1;
cv::warpPerspective(src_img,wrap_dst,warp_mat,src_img1.size());
dlib::cv_image<dlib::bgr_pixel> img3(wrap_dst);
FACE_MARK1 = detectface(img3);
Mat new_img = Mat::zeros(src_img1.size(),CV_8UC3);
std::vector<image_window::overlay_line> faceLine;
faceLine = render_face_detections(FACE_MARK1);
for(int i = 17 ;i < 61; i++){
cv::Point p1(faceLine[i].p1.x(),faceLine[i].p1.y());
cv::Point p2(faceLine[i].p2.x(),faceLine[i].p2.y());
cv::line(new_img,p1,p2,cv::Scalar(255,255,255),1,cv::LINE_8);
}
imwrite("/data/data/com.example.faceswapdemo/files/newimg.png",new_img);
int blur_amount = faceLine[41].p1.x() - faceLine[47].p1.x();
int blur_mask = blur_amount;
if (blur_mask % 2 == 0) {
blur_mask += 1;
}
LOGD("blur_mask%d-------",blur_mask);
GaussianBlur(new_img,mask,Size(blur_mask,blur_mask),0,0);
GaussianBlur(mask,mask,Size(blur_mask,blur_mask),0,0);
// FileStorage fs("/data/data/com.example.faceswapdemo/files/face.xml",FileStorage::WRITE);
// fs << "mask" << mask;
blur_amount = blur_amount*3/5;
if (blur_amount % 2 == 0) {
blur_amount += 1;
}
LOGD("blur_amount%d-------",blur_amount);
Mat blur0 = Mat(wrap_dst.size(), wrap_dst.type());
GaussianBlur(wrap_dst, blur0, Size( blur_amount, blur_amount ), 0, 0 );
Mat blur1= Mat(src_img1.size(), src_img1.type());
GaussianBlur(src_img1, blur1, Size( blur_amount, blur_amount ), 0, 0 );
Mat out = Mat::zeros( blur1.size(), blur1.type());
for (int y = 0; y < blur1.rows; y++) {
for (int x = 0; x < blur1.cols; x++) {
for (int c = 0; c < 3; c++) {
if ((blur0.at<Vec3b>(y, x)[c]) > 0) {
out.at<Vec3b>(y, x)[c] = saturate_cast<uchar>(
(wrap_dst.at<Vec3b>(y, x)[c])
* (blur1.at<Vec3b>(y, x)[c])
/ (blur0.at<Vec3b>(y, x)[c]));
} else {
out.at<Vec3b>(y, x)[c] = saturate_cast<uchar>(
(wrap_dst.at<Vec3b>(y, x)[c])
* (blur1.at<Vec3b>(y, x)[c]));
}
}
}
}
for(int y = 0; y < src_img1.rows; y++ )
{
for(int x = 0; x < src_img1.cols; x++ )
{
if((mask.at<Vec3b>(y,x)[0]) > 0) {
for(int c = 0; c < 3; c++ )
{
src_img1.at<Vec3b>(y,x)[c]= (out.at<Vec3b>(y,x)[c]);
}
}
}
}
// src_img1 += 128 * (src_img1 <= 1.0);
// for(int i = 0; i < faceLine1.size(); i++) {
// //mark the all face data
// cv::Point p1(faceLine1[i].p1.x(),faceLine[i].p1.y());
// cv::Point p2(faceLine1[i].p2.x(),faceLine[i].p2.y());
// cv::line(src_img,p1,p2,cv::Scalar(0,255,0),1,cv::LINE_8);
// }
//
// for(int i = 0; i < faceLine2.size(); i++) {
// //mark the all face data
// cv::Point p1(faceLine2[i].p1.x(),faceLine[i].p1.y());
// cv::Point p2(faceLine2[i].p2.x(),faceLine[i].p2.y());
// cv::line(src_img1,p1,p2,cv::Scalar(0,255,0),1,cv::LINE_8);
// }
//Size dsize = Size(src_img1.cols,src_img1.rows);
//Mat image2 = Mat(dsize,wrap_dst.type());
//LOGD("img1 w:%d h:%d",src_img1.cols,src_img1.rows);
//resize(wrap_dst, image2,dsize);
//LOGD("dst w:%d h:%d",image2.cols,image2.rows);
imwrite("/data/data/com.example.faceswapdemo/files/data.jpg",src_img1);
env->ReleaseIntArrayElements(face1, cbuf1, 0);
env->ReleaseIntArrayElements(face2, cbuf2, 0);
return JNI_TRUE;
}
jboolean studytrain()
{
dlib::deserialize("/data/data/com.example.faceswapdemo/files/shape_predictor_68_face_landmarks.dat") >> msp;
detector = get_frontal_face_detector();
return JNI_TRUE;
}
jboolean facecheck(JNIEnv* env, jobject thiz,
jintArray face1,jint w1,jint h1,
jintArray face2,jint w2,jint h2)
{
return facesRotate_UseData(env,thiz,face1,w1,h1,face2,w2,h2);
}
jboolean removefaceswap()
{
return JNI_TRUE;
}
void startfaceswap()
{
}
void endfaceswap()
{
}
void facemapping()
{
}
void facefusion()
{
}
void faceswap()
{
}
void setfaceswapcallback()
{
}
/*jintArray facesRotate_UsePath(JNIEnv* env, jobject thiz,jstring path1,jstring path2)
{
LOGD("in det");
frontal_face_detector detector = get_frontal_face_detector();
char * str1;
str1=(char*)env->GetStringUTFChars(path1,NULL);
char * str2;
str2=(char*)env->GetStringUTFChars(path2,NULL);
LOGD("load picture");
Mat src_img = imread(str1,1);
Mat src_img1 = imread(str2,1);
LOGD("%d-----img1-----%d",src_img.cols,src_img.rows);
LOGD("%d-----img1-----%d",src_img1.cols,src_img1.rows);
LOGD("opencv img to dlib img");
if(!src_img.data || !src_img1.data)
LOGD("data error");
dlib::cv_image<dlib::bgr_pixel> img(src_img);
dlib::cv_image<dlib::bgr_pixel> img1(src_img1);
LOGD("start detector");
std::vector<dlib::rectangle> mRets;
std::vector<dlib::rectangle> mRets1;
double t = (double)getTickCount();
mRets = detector(img);
mRets1 = detector(img1);
t = ((double)getTickCount()-t)/getTickFrequency();
dlib::shape_predictor msp;
LOGD("detect use %lf s",t);
t = (double)getTickCount();
dlib::deserialize("/data/data/com.example.faceswapdemo/files/shape_predictor_68_face_landmarks.dat") >> msp;
t = ((double)getTickCount()-t)/getTickFrequency();
LOGD("load local mark use %lf s",t);
LOGD("get face mark from the picture");
t = (double)getTickCount();
dlib::full_object_detection shape1,shape2;
if (mRets.size() != 0) {
for (unsigned long j = 0; j < mRets.size(); ++j)
shape1 = msp(img, mRets[j]);
}
if (mRets1.size() != 0) {
for (unsigned long j = 0; j < mRets1.size(); ++j)
shape2 = msp(img1, mRets1[j]);
}
t = ((double)getTickCount()-t)/getTickFrequency();
LOGD("--get face mark use %lf s---",t);
LOGD("--use face mark to get mapping matrix---");
t = (double)getTickCount();
std::vector<cv::Point2f> srcTri;
std::vector<cv::Point2f> dstTri;
for(int i = 0;i < 16;i++) {
srcTri.push_back(cv::Point2f(shape1.part(i).x(),shape1.part(i).y()));
dstTri.push_back(cv::Point2f(shape2.part(i).x(),shape2.part(i).y()));
}
LOGD("--got special points---");
cv::Mat warp_mat;
warp_mat = cv::findHomography(srcTri,dstTri,0);
LOGD("%d---get change matrix-----%d",warp_mat.cols,warp_mat.rows);
uchar* p1;
for(int i = 0;i < 3;i ++) {
p1 = warp_mat.ptr<uchar>(i);
for(int j = 0;j < 3;j ++)
LOGD("matrix-----%d",(int)p1[j]);
}
//the faceLine's size is 68,including all face data,such as:
//the index 0 - 16:jaw data
//the index 17 - 68:face data
//the index 17 - 21:right brow data
//the index 22 - 26:left brow data
//the index 27 - 34:nose data
//the index 36 - 41:right eye data
//the index 42 - 47:left eye data
//the index 48 - 60:mouth data
LOGD("use the mapping matrix to change the picture");
Mat wrap_dst;
cv::warpPerspective(src_img,wrap_dst,warp_mat,src_img.size());
t = ((double)getTickCount()-t)/getTickFrequency();
LOGD("change picture use %lf s",t);
int size = wrap_dst.cols * wrap_dst.rows;
jintArray result = env->NewIntArray(size);
uchar* p;
p = wrap_dst.ptr(0);
cv::imwrite("/data/data/com.example.faceswapdemo/files/face_wrap1.png", wrap_dst);
env->SetIntArrayRegion(result, 0, size, (int*)p);
env->ReleaseStringUTFChars(path1, str1);
env->ReleaseStringUTFChars(path2, str2);
return result;
}*/
void testmat(jlong addr1,jlong addr2)
{
Mat& src = *(Mat*)addr1;
Mat& src1 = *(Mat*)addr2;
// Mat src1 = Mat(*((Mat*)addr2));
cvtColor(src1,src1,CV_RGBA2RGB);
cvtColor(src,src,CV_BGR2GRAY);
}
static JNINativeMethod gMethods[] = {
{"facecheck", "([III[III)Z", (void*)facecheck},
{"studytrain","()Z",(void*) studytrain},
// {"faceRotateUsePath", "(Ljava/lang/String;Ljava/lang/String;)[I", (void*)facesRotate_UsePath},
// {"testMat","(JJ)V",(void*) testmat},
// {"faceRotate", "()I", (void*)facesRotate},
};
/*
* 为某一个类注册本地方法
*/
static int registerNativeMethods(JNIEnv* env
, const char* className
, JNINativeMethod* gMethods, int numMethods) {
jclass clazz;
clazz = env->FindClass(className);
if (clazz == NULL) {
return JNI_FALSE;
}
if (env->RegisterNatives(clazz, gMethods, numMethods) < 0) {
return JNI_FALSE;
}
return JNI_TRUE;
}
/*
* 为所有类注册本地方法
*/
static int registerNatives(JNIEnv* env) {
const char* kClassName = "com/example/faceswapdemo/FaceUtils";//指定要注册的类
return registerNativeMethods(env, kClassName, gMethods,
sizeof(gMethods)/sizeof(gMethods[0]));
}
jint JNIEXPORT JNICALL JNI_OnLoad(JavaVM* vm, void* reserved)
{
LOGE("JNI On Load");
JNIEnv* env = NULL;
jint result = -1;
if (vm->GetEnv((void**)&env, JNI_VERSION_1_4) != JNI_OK) {
LOGE("GetEnv failed!");
return result;
}
LOGE("start register!");
if (!registerNatives(env)) {//注册
LOGE("register failed!");
return result;
}
return JNI_VERSION_1_4;
}
/*
#ifdef __cplusplus
}
#endif
*/