申明:由于本人水平有限,所提供的代码、dll、so等必然存在很多问题,仅用于学习,不适合工业级使用,请谨慎使用,如果造成损失,责任自负。
基于seetaface的android实现,包含检测、对齐、比对,带自动遍历比对人脸,
JNI传递复杂对象,包括数组,
采用NEON优化提高处理速度,提供人脸旋转角度roll,pitch,yaw,
加载人脸识别模型改为只需加载一次,
在手机上提取特征并比对一次约需1.5~2秒,下图是模拟器的,只需要500多毫秒。
需要自己编译jni,并把3个.bin的模型文件放到手机的/sdcard/目录,也就是连上电脑后的根目录。
3个模型文件seeta_fa_v1.1.bin, seeta_fd_frontal_v1.0.bin,seeta_fr_v1.0.bin:
链接:http://pan.baidu.com/s/1geMDddD 密码:km1q
apk:http://download.csdn.net/detail/wuzuyu365/9843787
源码: 下载
NDK建议用android-ndk-r9d,SDK建议用Android-19或Android-20
jni编译好的so库:http://download.csdn.net/detail/wuzuyu365/9810605
neon优化的文件:math_functions.cpp
/*
*
* This file is part of the open-source SeetaFace engine, which includes three modules:
* SeetaFace Detection, SeetaFace Alignment, and SeetaFace Identification.
*
* This file is part of the SeetaFace Identification module, containing codes implementing the
* face identification method described in the following paper:
*
*
* VIPLFaceNet: An Open Source Deep Face Recognition SDK,
* Xin Liu, Meina Kan, Wanglong Wu, Shiguang Shan, Xilin Chen.
* In Frontiers of Computer Science.
*
*
* Copyright (C) 2016, Visual Information Processing and Learning (VIPL) group,
* Institute of Computing Technology, Chinese Academy of Sciences, Beijing, China.
*
* The codes are mainly developed by Zining Xu(a M.S. supervised by Prof. Shiguang Shan)
*
* As an open-source face recognition engine: you can redistribute SeetaFace source codes
* and/or modify it under the terms of the BSD 2-Clause License.
*
* You should have received a copy of the BSD 2-Clause License along with the software.
* If not, see < https://opensource.org/licenses/BSD-2-Clause>.
*
* Contact Info: you can send an email to SeetaFace@vipl.ict.ac.cn for any problems.
*
* Note: the above information must be kept whenever or wherever the codes are used.
*
*/
#include "math_functions.h"
//#include <xmmintrin.h>
#include "arm_neon.h"
#include <cstdint>
//neon process
//采用neon单指令流多数据流SIMD优化内积运算,可以减少1秒时间
float simd_dot(const float* src1, const float* src2, const long& count) {
long i = 0;
float32x4_t sum_vec = vdupq_n_f32(0);
for (; i <count - 3 ; i+=4){
float32x4_t data_a = vld1q_f32(&src1[i]);
float32x4_t data_b = vld1q_f32(&src2[i]);
sum_vec = vaddq_f32(sum_vec, vmulq_f32(data_a, data_b));
}
float sum = sum_vec[0] + sum_vec[1] + sum_vec[2] + sum_vec[3];
//累加剩下的
for (; i < count; i++){
sum += src1[i] * src2[i];
}
return sum;
}
//为了更进一步提高速度,可尝试采用unsigned char类型的点积,未完成
float simd_dot_uc(const unsigned char* src1, const unsigned char* src2, const long& count) {
long i = 0;
//neon process
uint8x16_t sum_vec = vdupq_n_u8(0);
for (; i <count - 15 ; i+=16){
uint8x16_t data_a = vld1q_u8(&src1[i]);
uint8x16_t data_b = vld1q_u8(&src2[i]);
sum_vec = vaddq_u8(sum_vec, vmulq_u8(data_a, data_b));
}
float sum = 0;
for(int i=0; i<16; i++){
sum += sum_vec[i];
}
//累加剩下的
for (; i < count; i++){
sum += src1[i] * src2[i];
}
return sum;
}
//耗时很长的没有并行化的点积计算代码
float simd_dot_c(const float* x, const float* y, const long& len) {
float inner_prod = 0.0f;
long i;
//LOGD("simd_dot,len=%ld", len);
for (i = 0; i < len; i++) {
inner_prod += x[i] * y[i];
}
return inner_prod;
}
//PC上的SSE优化代码,android上无法使用
float simd_dot_sse(const float* x, const float* y, const long& len) {
float inner_prod = 0.0f;
__m128 X, Y; // 128-bit values
__m128 acc = _mm_setzero_ps(); // set to (0, 0, 0, 0)
float temp[4];
long i;
for (i = 0; i + 4 < len; i += 4) {
X = _mm_loadu_ps(x + i); // load chunk of 4 floats
Y = _mm_loadu_ps(y + i);
acc = _mm_add_ps(acc, _mm_mul_ps(X, Y));
}
_mm_storeu_ps(&temp[0], acc); // store acc into an array
inner_prod = temp[0] + temp[1] + temp[2] + temp[3];
return inner_prod;
}
void matrix_procuct(const float* A, const float* B, float* C, const int n,
const int m, const int k, bool ta, bool tb) {
#ifdef _BLAS
arma::fmat mA = ta ? arma::fmat(A, k, n).t() : arma::fmat(A, n, k);
arma::fmat mB = tb ? arma::fmat(B, m, k).t() : arma::fmat(B, k, m);
arma::fmat mC(C, n, m, false);
mC = mA * mB;
#else
CHECK_TRUE(ta && !tb);
const float* x = B;
for (int i = 0, idx = 0; i < m; ++i) {
const float* y = A;
for (int j = 0; j < n; ++j, ++idx) {
C[idx] = simd_dot(x, y, k);
y += k;
}
x += k;
}
#endif
}