在运行voc-release3.1之前,需要修改dt.cc、features.cc、resize.cc、fconv.cc、compile.m五个文件,
修改后的文件为:
dt.cpp
#include <math.h>
#include <sys/types.h>
#include "mex.h"
#define int32_t int
/*
* Generalized distance transforms.
* We use a simple nlog(n) divide and conquer algorithm instead of the
* theoretically faster linear method, for no particular reason except
* that this is a bit simpler and I wanted to test it out.
*
* The code is a bit convoluted because dt1d can operate either along
* a row or column of an array.
*/
static inline int square(int x) { return x*x; }
// dt helper function
void dt_helper(double *src, double *dst, int *ptr, int step,
int s1, int s2, int d1, int d2, double a, double b) {
if (d2 >= d1) {
int d = (d1+d2) >> 1;
int s = s1;
for (int p = s1+1; p <= s2; p++)
if (src[s*step] + a*square(d-s) + b*(d-s) >
src[p*step] + a*square(d-p) + b*(d-p))
s = p;
dst[d*step] = src[s*step] + a*square(d-s) + b*(d-s);
ptr[d*step] = s;
dt_helper(src, dst, ptr, step, s1, s, d1, d-1, a, b);
dt_helper(src, dst, ptr, step, s, s2, d+1, d2, a, b);
}
}
// dt of 1d array
void dt1d(double *src, double *dst, int *ptr, int step, int n,
double a, double b) {
dt_helper(src, dst, ptr, step, 0, n-1, 0, n-1, a, b);
}
// matlab entry point
// [M, Ix, Iy] = dt(vals, ax, bx, ay, by)
void mexFunction(int nlhs, mxArray *plhs[], int nrhs, const mxArray *prhs[]) {
if (nrhs != 5)
mexErrMsgTxt("Wrong number of inputs");
if (nlhs != 3)
mexErrMsgTxt("Wrong number of outputs");
if (mxGetClassID(prhs[0]) != mxDOUBLE_CLASS)
mexErrMsgTxt("Invalid input");
const int *dims = mxGetDimensions(prhs[0]);
double *vals = (double *)mxGetPr(prhs[0]);
double ax = mxGetScalar(prhs[1]);
double bx = mxGetScalar(prhs[2]);
double ay = mxGetScalar(prhs[3]);
double by = mxGetScalar(prhs[4]);
mxArray *mxM = mxCreateNumericArray(2, dims, mxDOUBLE_CLASS, mxREAL);
mxArray *mxIx = mxCreateNumericArray(2, dims, mxINT32_CLASS, mxREAL);
mxArray *mxIy = mxCreateNumericArray(2, dims, mxINT32_CLASS, mxREAL);
double *M = (double *)mxGetPr(mxM);
int32_t *Ix = (int32_t *)mxGetPr(mxIx);
int32_t *Iy = (int32_t *)mxGetPr(mxIy);
double *tmpM = (double *)mxCalloc(dims[0]*dims[1], sizeof(double));
int32_t *tmpIx = (int32_t *)mxCalloc(dims[0]*dims[1], sizeof(int32_t));
int32_t *tmpIy = (int32_t *)mxCalloc(dims[0]*dims[1], sizeof(int32_t));
for (int x = 0; x < dims[1]; x++)
dt1d(vals+x*dims[0], tmpM+x*dims[0], tmpIy+x*dims[0], 1, dims[0], ay, by);
for (int y = 0; y < dims[0]; y++)
dt1d(tmpM+y, M+y, tmpIx+y, dims[0], dims[1], ax, bx);
// get argmins and adjust for matlab indexing from 1
for (int x = 0; x < dims[1]; x++) {
for (int y = 0; y < dims[0]; y++) {
int p = x*dims[0]+y;
Ix[p] = tmpIx[p]+1;
Iy[p] = tmpIy[tmpIx[p]*dims[0]+y]+1;
}
}
mxFree(tmpM);
mxFree(tmpIx);
mxFree(tmpIy);
plhs[0] = mxM;
plhs[1] = mxIx;
plhs[2] = mxIy;
}
fconv.cpp
#include "mex.h"
#include <math.h>
#include <string.h>
/*
* This code is used for computing filter responses. It computes the
* response of a set of filters with a feature map.
*
* Basic version, relatively slow but very compatible.
*/
struct thread_data {
double *A;
double *B;
double *C;
mxArray *mxC;
const mwSize *A_dims;
const mwSize *B_dims;
mwSize C_dims[2];
};
// convolve A and B
void process(void *thread_arg) {
thread_data *args = (thread_data *)thread_arg;
double *A = args->A;
double *B = args->B;
double *C = args->C;
const mwSize *A_dims = args->A_dims;
const mwSize *B_dims = args->B_dims;
const mwSize *C_dims = args->C_dims;
int num_features = args->A_dims[2];
for (int f = 0; f < num_features; f++) {
double *dst = C;
double *A_src = A + f*A_dims[0]*A_dims[1];
double *B_src = B + f*B_dims[0]*B_dims[1];
for (int x = 0; x < C_dims[1]; x++) {
for (int y = 0; y < C_dims[0]; y++) {
double val = 0;
for (int xp = 0; xp < B_dims[1]; xp++) {
double *A_off = A_src + (x+xp)*A_dims[0] + y;
double *B_off = B_src + xp*B_dims[0];
switch(B_dims[0]) {
case 20: val += A_off[19] * B_off[19];
case 19: val += A_off[18] * B_off[18];
case 18: val += A_off[17] * B_off[17];
case 17: val += A_off[16] * B_off[16];
case 16: val += A_off[15] * B_off[15];
case 15: val += A_off[14] * B_off[14];
case 14: val += A_off[13] * B_off[13];
case 13: val += A_off[12] * B_off[12];
case 12: val += A_off[11] * B_off[11];
case 11: val += A_off[10] * B_off[10];
case 10: val += A_off[9] * B_off[9];
case 9: val += A_off[8] * B_off[8];
case 8: val += A_off[7] * B_off[7];
case 7: val += A_off[6] * B_off[6];
case 6: val += A_off[5] * B_off[5];
case 5: val += A_off[4] * B_off[4];
case 4: val += A_off[3] * B_off[3];
case 3: val += A_off[2] * B_off[2];
case 2: val += A_off[1] * B_off[1];
case 1: val += A_off[0] * B_off[0];
break;
default:
for (int yp = 0; yp < B_dims[0]; yp++) {
val += *(A_off++) * *(B_off++);
}
}
}
*(dst++) += val;
}
}
}
}
// matlab entry point
// C = fconv(A, cell of B, start, end);
void mexFunction(int nlhs, mxArray *plhs[], int nrhs, const mxArray *prhs[]) {
if (nrhs != 4)
mexErrMsgTxt("Wrong number of inputs");
if (nlhs != 1)
mexErrMsgTxt("Wrong number of outputs");
// get A
const mxArray *mxA = prhs[0];
if (mxGetNumberOfDimensions(mxA) != 3 ||
mxGetClassID(mxA) != mxDOUBLE_CLASS)
mexErrMsgTxt("Invalid input: A");
// get B and start/end
const mxArray *cellB = prhs[1];
mwSize num_bs = mxGetNumberOfElements(cellB);
int start = (int)mxGetScalar(prhs[2]) - 1;
int end = (int)mxGetScalar(prhs[3]) - 1;
if (start < 0 || end >= num_bs || start > end)
mexErrMsgTxt("Invalid input: start/end");
int len = end-start+1;
// output cell
plhs[0] = mxCreateCellMatrix(1, len);
// do convolutions
thread_data td;
const mwSize *A_dims = mxGetDimensions(mxA);
double *A = (double *)mxGetPr(mxA);
for (int i = 0; i < len; i++) {
const mxArray *mxB = mxGetCell(cellB, i+start);
td.A_dims = A_dims;
td.A = A;
td.B_dims = mxGetDimensions(mxB);
td.B = (double *)mxGetPr(mxB);
if (mxGetNumberOfDimensions(mxB) != 3 ||
mxGetClassID(mxB) != mxDOUBLE_CLASS ||
td.A_dims[2] != td.B_dims[2])
mexErrMsgTxt("Invalid input: B");
// compute size of output
int height = td.A_dims[0] - td.B_dims[0] + 1;
int width = td.A_dims[1] - td.B_dims[1] + 1;
if (height < 1 || width < 1)
mexErrMsgTxt("Invalid input: B should be smaller than A");
td.C_dims[0] = height;
td.C_dims[1] = width;
td.mxC = mxCreateNumericArray(2, td.C_dims, mxDOUBLE_CLASS, mxREAL);
td.C = (double *)mxGetPr(td.mxC);
process((void *)&td);
mxSetCell(plhs[0], i, td.mxC);
}
}
resize.cpp
#include <math.h>
#include <assert.h>
#include <string.h>
#include "mex.h"
/*
* Fast image subsampling.
* This is used to construct the feature pyramid.
*/
#define bzero(a,b) memset(a,0,b)
int round(float a){float tmp = a-(int)a; if(tmp>=0.5) return(int)a+1;else return (int)a;}
// struct used for caching interpolation values
struct alphainfo {
int si, di;
double alpha;
};
// copy src into dst using pre-computed interpolation values
void alphacopy(double *src, double *dst, struct alphainfo *ofs, int n) {
struct alphainfo *end = ofs + n;
while (ofs != end) {
dst[ofs->di] += ofs->alpha * src[ofs->si];
ofs++;
}
}
// resize along each column
// result is transposed, so we can apply it twice for a complete resize
void resize1dtran(double *src, int sheight, double *dst, int dheight,
int width, int chan) {
double scale = (double)dheight/(double)sheight;
double invscale = (double)sheight/(double)dheight;
// we cache the interpolation values since they can be
// shared among different columns
int len = (int)ceil(dheight*invscale) + 2*dheight;
//alphainfo ofs[len];linux下的语句
alphainfo *ofs = new alphainfo[len];//windows下的语句
int k = 0;
for (int dy = 0; dy < dheight; dy++) {
double fsy1 = dy * invscale;
double fsy2 = fsy1 + invscale;
int sy1 = (int)ceil(fsy1);
int sy2 = (int)floor(fsy2);
if (sy1 - fsy1 > 1e-3) {
assert(k < len);
assert(sy-1 >= 0);
ofs[k].di = dy*width;
ofs[k].si = sy1-1;
ofs[k++].alpha = (sy1 - fsy1) * scale;
}
for (int sy = sy1; sy < sy2; sy++) {
assert(k < len);
assert(sy < sheight);
ofs[k].di = dy*width;
ofs[k].si = sy;
ofs[k++].alpha = scale;
}
if (fsy2 - sy2 > 1e-3) {
assert(k < len);
assert(sy2 < sheight);
ofs[k].di = dy*width;
ofs[k].si = sy2;
ofs[k++].alpha = (fsy2 - sy2) * scale;
}
}
// resize each column of each color channel
bzero(dst, chan*width*dheight*sizeof(double));
for (int c = 0; c < chan; c++) {
for (int x = 0; x < width; x++) {
double *s = src + c*width*sheight + x*sheight;
double *d = dst + c*width*dheight + x;
alphacopy(s, d, ofs, k);
}
}
delete [] ofs;//windows下的语句
}
// main function
// takes a double color image and a scaling factor
// returns resized image
mxArray *resize(const mxArray *mxsrc, const mxArray *mxscale) {
double *src = (double *)mxGetPr(mxsrc);
const int *sdims = mxGetDimensions(mxsrc);
if (mxGetNumberOfDimensions(mxsrc) != 3 ||
mxGetClassID(mxsrc) != mxDOUBLE_CLASS)
mexErrMsgTxt("Invalid input");
double scale = mxGetScalar(mxscale);
if (scale > 1)
mexErrMsgTxt("Invalid scaling factor");
int ddims[3];
ddims[0] = (int)round(sdims[0]*scale);
ddims[1] = (int)round(sdims[1]*scale);
ddims[2] = sdims[2];
mxArray *mxdst = mxCreateNumericArray(3, ddims, mxDOUBLE_CLASS, mxREAL);
double *dst = (double *)mxGetPr(mxdst);
double *tmp = (double *)mxCalloc(ddims[0]*sdims[1]*sdims[2], sizeof(double));
resize1dtran(src, sdims[0], tmp, ddims[0], sdims[1], sdims[2]);
resize1dtran(tmp, sdims[1], dst, ddims[1], ddims[0], sdims[2]);
mxFree(tmp);
return mxdst;
}
// matlab entry point
// dst = resize(src, scale)
// image should be color with double values
void mexFunction(int nlhs, mxArray *plhs[], int nrhs, const mxArray *prhs[]) {
if (nrhs != 2)
mexErrMsgTxt("Wrong number of inputs");
if (nlhs != 1)
mexErrMsgTxt("Wrong number of outputs");
plhs[0] = resize(prhs[0], prhs[1]);
}
features.cpp
#include <math.h>
#include "mex.h"
// small value, used to avoid division by zero
#define eps 0.0001
int round(float a){float tmp = a-(int)a; if(tmp>=0.5) return (int)a+1;else return (int)a;}
// unit vectors used to compute gradient orientation
double uu[9] = {1.0000,
0.9397,
0.7660,
0.500,
0.1736,
-0.1736,
-0.5000,
-0.7660,
-0.9397};
double vv[9] = {0.0000,
0.3420,
0.6428,
0.8660,
0.9848,
0.9848,
0.8660,
0.6428,
0.3420};
static inline double min(double x, double y) { return (x <= y ? x : y); }
static inline double max(double x, double y) { return (x <= y ? y : x); }
static inline int min(int x, int y) { return (x <= y ? x : y); }
static inline int max(int x, int y) { return (x <= y ? y : x); }
// main function:
// takes a double color image and a bin size
// returns HOG features
mxArray *process(const mxArray *mximage, const mxArray *mxsbin) {
double *im = (double *)mxGetPr(mximage);
const int *dims = mxGetDimensions(mximage);
if (mxGetNumberOfDimensions(mximage) != 3 ||
dims[2] != 3 ||
mxGetClassID(mximage) != mxDOUBLE_CLASS)
mexErrMsgTxt("Invalid input");
int sbin = (int)mxGetScalar(mxsbin);
// memory for caching orientation histograms & their norms
int blocks[2];
blocks[0] = (int)round((double)dims[0]/(double)sbin);
blocks[1] = (int)round((double)dims[1]/(double)sbin);
double *hist = (double *)mxCalloc(blocks[0]*blocks[1]*18, sizeof(double));
double *norm = (double *)mxCalloc(blocks[0]*blocks[1], sizeof(double));
// memory for HOG features
int out[3];
out[0] = max(blocks[0]-2, 0);
out[1] = max(blocks[1]-2, 0);
out[2] = 27+4;
mxArray *mxfeat = mxCreateNumericArray(3, out, mxDOUBLE_CLASS, mxREAL);
double *feat = (double *)mxGetPr(mxfeat);
int visible[2];
visible[0] = blocks[0]*sbin;
visible[1] = blocks[1]*sbin;
for (int x = 1; x < visible[1]-1; x++) {
for (int y = 1; y < visible[0]-1; y++) {
// first color channel
double *s = im + min(x, dims[1]-2)*dims[0] + min(y, dims[0]-2);
double dy = *(s+1) - *(s-1);
double dx = *(s+dims[0]) - *(s-dims[0]);
double v = dx*dx + dy*dy;
// second color channel
s += dims[0]*dims[1];
double dy2 = *(s+1) - *(s-1);
double dx2 = *(s+dims[0]) - *(s-dims[0]);
double v2 = dx2*dx2 + dy2*dy2;
// third color channel
s += dims[0]*dims[1];
double dy3 = *(s+1) - *(s-1);
double dx3 = *(s+dims[0]) - *(s-dims[0]);
double v3 = dx3*dx3 + dy3*dy3;
// pick channel with strongest gradient
if (v2 > v) {
v = v2;
dx = dx2;
dy = dy2;
}
if (v3 > v) {
v = v3;
dx = dx3;
dy = dy3;
}
// snap to one of 18 orientations
double best_dot = 0;
int best_o = 0;
for (int o = 0; o < 9; o++) {
double dot = uu[o]*dx + vv[o]*dy;
if (dot > best_dot) {
best_dot = dot;
best_o = o;
} else if (-dot > best_dot) {
best_dot = -dot;
best_o = o+9;
}
}
// add to 4 histograms around pixel using linear interpolation
double xp = ((double)x+0.5)/(double)sbin - 0.5;
double yp = ((double)y+0.5)/(double)sbin - 0.5;
int ixp = (int)floor(xp);
int iyp = (int)floor(yp);
double vx0 = xp-ixp;
double vy0 = yp-iyp;
double vx1 = 1.0-vx0;
double vy1 = 1.0-vy0;
v = sqrt(v);
if (ixp >= 0 && iyp >= 0) {
*(hist + ixp*blocks[0] + iyp + best_o*blocks[0]*blocks[1]) +=
vx1*vy1*v;
}
if (ixp+1 < blocks[1] && iyp >= 0) {
*(hist + (ixp+1)*blocks[0] + iyp + best_o*blocks[0]*blocks[1]) +=
vx0*vy1*v;
}
if (ixp >= 0 && iyp+1 < blocks[0]) {
*(hist + ixp*blocks[0] + (iyp+1) + best_o*blocks[0]*blocks[1]) +=
vx1*vy0*v;
}
if (ixp+1 < blocks[1] && iyp+1 < blocks[0]) {
*(hist + (ixp+1)*blocks[0] + (iyp+1) + best_o*blocks[0]*blocks[1]) +=
vx0*vy0*v;
}
}
}
// compute energy in each block by summing over orientations
for (int o = 0; o < 9; o++) {
double *src1 = hist + o*blocks[0]*blocks[1];
double *src2 = hist + (o+9)*blocks[0]*blocks[1];
double *dst = norm;
double *end = norm + blocks[1]*blocks[0];
while (dst < end) {
*(dst++) += (*src1 + *src2) * (*src1 + *src2);
src1++;
src2++;
}
}
// compute features
for (int x = 0; x < out[1]; x++) {
for (int y = 0; y < out[0]; y++) {
double *dst = feat + x*out[0] + y;
double *src, *p, n1, n2, n3, n4;
p = norm + (x+1)*blocks[0] + y+1;
n1 = 1.0 / sqrt(*p + *(p+1) + *(p+blocks[0]) + *(p+blocks[0]+1) + eps);
p = norm + (x+1)*blocks[0] + y;
n2 = 1.0 / sqrt(*p + *(p+1) + *(p+blocks[0]) + *(p+blocks[0]+1) + eps);
p = norm + x*blocks[0] + y+1;
n3 = 1.0 / sqrt(*p + *(p+1) + *(p+blocks[0]) + *(p+blocks[0]+1) + eps);
p = norm + x*blocks[0] + y;
n4 = 1.0 / sqrt(*p + *(p+1) + *(p+blocks[0]) + *(p+blocks[0]+1) + eps);
double t1 = 0;
double t2 = 0;
double t3 = 0;
double t4 = 0;
// contrast-sensitive features
src = hist + (x+1)*blocks[0] + (y+1);
for (int o = 0; o < 18; o++) {
double h1 = min(*src * n1, 0.2);
double h2 = min(*src * n2, 0.2);
double h3 = min(*src * n3, 0.2);
double h4 = min(*src * n4, 0.2);
*dst = 0.5 * (h1 + h2 + h3 + h4);
t1 += h1;
t2 += h2;
t3 += h3;
t4 += h4;
dst += out[0]*out[1];
src += blocks[0]*blocks[1];
}
// contrast-insensitive features
src = hist + (x+1)*blocks[0] + (y+1);
for (int o = 0; o < 9; o++) {
double sum = *src + *(src + 9*blocks[0]*blocks[1]);
double h1 = min(sum * n1, 0.2);
double h2 = min(sum * n2, 0.2);
double h3 = min(sum * n3, 0.2);
double h4 = min(sum * n4, 0.2);
*dst = 0.5 * (h1 + h2 + h3 + h4);
dst += out[0]*out[1];
src += blocks[0]*blocks[1];
}
// texture features
*dst = 0.2357 * t1;
dst += out[0]*out[1];
*dst = 0.2357 * t2;
dst += out[0]*out[1];
*dst = 0.2357 * t3;
dst += out[0]*out[1];
*dst = 0.2357 * t4;
}
}
mxFree(hist);
mxFree(norm);
return mxfeat;
}
// matlab entry point
// F = features(image, bin)
// image should be color with double values
void mexFunction(int nlhs, mxArray *plhs[], int nrhs, const mxArray *prhs[]) {
if (nrhs != 2)
mexErrMsgTxt("Wrong number of inputs");
if (nlhs != 1)
mexErrMsgTxt("Wrong number of outputs");
plhs[0] = process(prhs[0], prhs[1]);
}
compile.m
mex -O resize.cpp
mex -O dt.cpp
mex -O features.cpp
% use one of the following depending on your setup
% 1 is fastest, 3 is slowest
% 1) multithreaded convolution using blas
mex -O fconv.cpp
% 2) mulththreaded convolution without blas
% mex -O fconvMT.cc -o fconv
% 3) basic convolution, very compatible
% mex -O fconv.cc -o fconv