DPM(Defomable Parts Model) 源码分析-检测（二）

最新推荐文章于 2024-07-04 10:04:27 发布

冲冲那年

最新推荐文章于 2024-07-04 10:04:27 发布

阅读量573

点赞数

分类专栏：转载

转载专栏收录该内容

8 篇文章 0 订阅

订阅专栏

申明：本文非笔者原创，原文转载自：http://blog.csdn.net/ttransposition/article/details/12954195

DPM(Defomable Parts Model)原理

首先声明此版本为V3.1。因为和论文最相符。V4增加了模型数由2个增加为6个，V5提取了语义特征。源码太长纯代码应该在2K+,只选取了核心部分代码

demo.m

[cpp]view plaincopy 
   
 
   
 function demo()  
   
 test('000034.jpg', 'car');  
 test('000061.jpg', 'person');  
 test('000084.jpg', 'bicycle');  
   
 function test(name, cls)  
 % load and display image  
 im=imread(name);  
 clf;  
 image(im);  
 axis equal;   
 axis on;  
 disp('input image');  
 disp('press any key to continue'); pause;  
   
 % load and display model  
 load(['VOC2007/' cls '_final']); %加载模型  
 visualizemodel(model);  
 disp([cls ' model']);  
 disp('press any key to continue'); pause;  
   
 % detect objects  
 boxes = detect(im, model, 0); %model为mat中的结构体  
 top = nms(boxes, 0.5);  %Non-maximum suppression.  
 showboxes(im, top);  
 %print(gcf, '-djpeg90', '-r0', [cls '.jpg']);  
 disp('detections');  
 disp('press any key to continue'); pause;  
   
 % get bounding boxes  
 bbox = getboxes(model, boxes);  %根据检测到的root，parts，预测bounding  
 top = nms(bbox, 0.5);  
 bbox = clipboxes(im, top); %预测出来的bounding，可能会超过图像原始尺寸，所以要减掉  
 showboxes(im, bbox);  
 disp('bounding boxes');  
 disp('press any key to continue'); pause;  

detect.m

[cpp]view plaincopy 
   
 
   
 function [boxes] = detect(input, model, thresh, bbox, ...  
                           overlap, label, fid, id, maxsize)  
 % 论文 fig.4                         
   
 % boxes = detect(input, model, thresh, bbox, overlap, label, fid, id, maxsize)  
 % Detect objects in input using a model and a score threshold.  
 % Higher threshold leads to fewer detections.  
 % boxes = [rx1 ry1 rx2 ry2 | px1 py1 px2 py2 ...| componetindex | score ]  
 % The function returns a matrix with one row per detected object.  The  
 % last column of each row gives the score of the detection.  The  
 % column before last specifies the component used for the detection.  
 % The first 4 columns specify the bounding box for the root filter and  
 % subsequent columns specify the bounding boxes of each part.  
 %  
 % If bbox is not empty, we pick best detection with significant overlap.   
 % If label and fid are included, we write feature vectors to a data file.  
   
 %phase 2： im, model, 0, bbox, overlap, 1, fid, 2*i-1  
 % trian boxex : detect(im, model, 0, bbox, overlap)  
 if nargin > 3 && ~isempty(bbox)  
   latent = true;  
 else  
   latent = false;  
 end  
   
 if nargin > 6 && fid ~= 0  
   write = true;  
 else  
   write = false;  
 end  
   
 if nargin < 9  
   maxsize = inf;  
 end  
   
 % we assume color images  
 input = color(input);   %如果是灰度图，扩充为三通道 R=G=B=Gray  
   
 % prepare model for convolutions  
 rootfilters = [];  
 for i = 1:length(model.rootfilters) %   
   rootfilters{i} = model.rootfilters{i}.w;% r*w*31维向量，9（方向范围 0~180） +18（方向范围 0-360）+4（cell熵和）  
 end  
 partfilters = [];  
 for i = 1:length(model.partfilters)  
   partfilters{i} = model.partfilters{i}.w;  
 end  
   
 % cache some data 获取所有 root，part的所有信息  
 for c = 1:model.numcomponents   % releas3.1 一种对象，只有2个模型，releas5 有3*2个模型  
   ridx{c} = model.components{c}.rootindex; % m1=1,m2=2  
   oidx{c} = model.components{c}.offsetindex; %o1=1,o2=2  
   root{c} = model.rootfilters{ridx{c}}.w;  
   rsize{c} = [size(root{c},1) size(root{c},2)]; %root size,单位为 sbin*sbin的block块，相当于原始HOG中的一个cell  
   numparts{c} = length(model.components{c}.parts); %目前为固定值6个,但是有些part是 fake  
   for j = 1:numparts{c}  
     pidx{c,j} = model.components{c}.parts{j}.partindex; %part是在该对象的所有component的part下连续编号  
     didx{c,j} = model.components{c}.parts{j}.defindex;  % 在 rootfiter中的 anchor location  
     part{c,j} = model.partfilters{pidx{c,j}}.w; % 6*6*31  
     psize{c,j} = [size(part{c,j},1) size(part{c,j},2)]; %   
     % reverse map from partfilter index to (component, part#)  
     rpidx{pidx{c,j}} = [c j];  
   end  
 end  
   
 % we pad the feature maps to detect partially visible objects  
 padx = ceil(model.maxsize(2)/2+1); % 7/2+1 = 5  
 pady = ceil(model.maxsize(1)/2+1); % 11/2+1 = 7  
   
 % the feature pyramid  
 interval = model.interval;  %10  
 %--------------------------------特征金字塔---------------------------------------------------------  
 % feat的尺寸为 img.rows/sbin,img.cols/sbin  
 % scales:缩放了多少  
 [feat, scales] = featpyramid(input, model.sbin, interval); % 8，10  
   
 % detect at each scale  
 best = -inf;  
 ex = [];  
 boxes = [];  
 %---------------------逐层检测目标-----------------------------------------------------------%  
 for level = interval+1:length(feat) %注意是从第二层开始  
   scale = model.sbin/scales(level);  % 1/缩小了多少    
   if size(feat{level}, 1)+2*pady < model.maxsize(1) || ... %扩展后还是未能达到 能同时计算两个component的得分  
      size(feat{level}, 2)+2*padx < model.maxsize(2) || ...  
      (write && ftell(fid) >= maxsize) %已经没有空间保存样本了  
     continue;  
   end  
     
   if latent %训练时使用，检测时跳过  
     skip = true;  
     for c = 1:model.numcomponents  
       root_area = (rsize{c}(1)*scale) * (rsize{c}(2)*scale);% rootfilter  
       box_area = (bbox(3)-bbox(1)+1) * (bbox(4)-bbox(2)+1); % bbox该class 所有 rootfilter 的交集即minsize  
       if (root_area/box_area) >= overlap && (box_area/root_area) >= overlap %这句话真纠结，a>=0.7b,b>=0.7a -> a>=0.7b>=0.49a  
         skip = false;  
       end  
     end  
     if skip  
       continue;  
     end  
   end  
       
   % -----------convolve feature maps with filters -----------  
   %rootmatch,partmatch ,得分图root的尺度总是part的一半，  
   %rootmatch尺寸是partmatch的一半  
   featr = padarray(feat{level}, [pady padx 0], 0);  % 上下各补充 pady 行0，左右各补充padx行 0  
   %C = fconv(A, cell of B, start, end);  
   rootmatch = fconv(featr, rootfilters, 1, length(rootfilters));  
   if length(partfilters) > 0  
     featp = padarray(feat{level-interval}, [2*pady 2*padx 0], 0);  
     partmatch = fconv(featp, partfilters, 1, length(partfilters));  
   end  
   %-------------------逐component检测-----------------------------------  
   % 参见论文 Fig 4  
   % 最终得到  综合得分图   score  
   for c = 1:model.numcomponents  
     % root score + offset  
     score = rootmatch{ridx{c}} + model.offsets{oidx{c}}.w;    
     % add in parts  
     for j = 1:numparts{c}  
       def = model.defs{didx{c,j}}.w;  
       anchor = model.defs{didx{c,j}}.anchor;  
       % the anchor position is shifted to account for misalignment  
       % between features at different resolutions  
       ax{c,j} = anchor(1) + 1; %  
       ay{c,j} = anchor(2) + 1;  
       match = partmatch{pidx{c,j}};  
       [M, Ix{c,j}, Iy{c,j}] = dt(-match, def(1), def(2), def(3), def(4)); % dx,dy,dx^2,dy^2的偏移惩罚系数  
       % M part的综合匹配得分图，与part尺寸一致。Ix{c,j}, Iy{c,j} 即part实际的最佳位置（相对于root）  
       % 参见论文公式 9  
       score = score - M(ay{c,j}:2:ay{c,j}+2*(size(score,1)-1), ...  
                         ax{c,j}:2:ax{c,j}+2*(size(score,2)-1));  
     end  
       
     %-------阈值淘汰------------------------  
     if ~latent  
       % get all good matches  
       % ---thresh  在 分类时为0，在 找 hard exmaple 时是 -1.05--  
       I = find(score > thresh);  %返回的是从上到下从左到右的索引  
       [Y, X] = ind2sub(size(score), I);  %还原为 行，列坐标        
       tmp = zeros(length(I), 4*(1+numparts{c})+2);  %一个目标的root，part，score信息，见程序开头说明  
       for i = 1:length(I)  
         x = X(i);  
         y = Y(i);  
         [x1, y1, x2, y2] = rootbox(x, y, scale, padx, pady, rsize{c});  
         b = [x1 y1 x2 y2];  
         if write  
           rblocklabel = model.rootfilters{ridx{c}}.blocklabel;  
           oblocklabel = model.offsets{oidx{c}}.blocklabel;        
           f = featr(y:y+rsize{c}(1)-1, x:x+rsize{c}(2)-1, :);  
           xc = round(x + rsize{c}(2)/2 - padx); %   
           yc = round(y + rsize{c}(1)/2 - pady);  
           ex = [];  
           ex.header = [label; id; level; xc; yc; ...  
                        model.components{c}.numblocks; ...  
                        model.components{c}.dim];  
           ex.offset.bl = oblocklabel;  
           ex.offset.w = 1;  
           ex.root.bl = rblocklabel;  
           width1 = ceil(rsize{c}(2)/2);  
           width2 = floor(rsize{c}(2)/2);  
           f(:,1:width2,:) = f(:,1:width2,:) + flipfeat(f(:,width1+1:end,:));  
           ex.root.w = f(:,1:width1,:);  
           ex.part = [];  
         end  
         for j = 1:numparts{c}  
           [probex, probey, px, py, px1, py1, px2, py2] = ...  
               partbox(x, y, ax{c,j}, ay{c,j}, scale, padx, pady, ...  
                       psize{c,j}, Ix{c,j}, Iy{c,j});  
           b = [b px1 py1 px2 py2];  
           if write  
             if model.partfilters{pidx{c,j}}.fake  
               continue;  
             end  
             pblocklabel = model.partfilters{pidx{c,j}}.blocklabel;  
             dblocklabel = model.defs{didx{c,j}}.blocklabel;  
             f = featp(py:py+psize{c,j}(1)-1,px:px+psize{c,j}(2)-1,:);  
             def = -[(probex-px)^2; probex-px; (probey-py)^2; probey-py];  
             partner = model.partfilters{pidx{c,j}}.partner;  
             if partner > 0  
               k = rpidx{partner}(2);  
               [kprobex, kprobey, kpx, kpy, kpx1, kpy1, kpx2, kpy2] = ...  
                   partbox(x, y, ax{c,k}, ay{c,k}, scale, padx, pady, ...  
                           psize{c,k}, Ix{c,k}, Iy{c,k});  
               kf = featp(kpy:kpy+psize{c,k}(1)-1,kpx:kpx+psize{c,k}(2)-1,:);  
               % flip linear term in horizontal deformation model  
               kdef = -[(kprobex-kpx)^2; kpx-kprobex; ...  
                        (kprobey-kpy)^2; kprobey-kpy];  
               f = f + flipfeat(kf);  
               def = def + kdef;  
             else  
               width1 = ceil(psize{c,j}(2)/2);  
               width2 = floor(psize{c,j}(2)/2);  
               f(:,1:width2,:) = f(:,1:width2,:) + flipfeat(f(:,width1+1:end,:));  
               f = f(:,1:width1,:);  
             end  
             ex.part(j).bl = pblocklabel;  
             ex.part(j).w = f;  
             ex.def(j).bl = dblocklabel;  
             ex.def(j).w = def;  
           end  
         end  
         if write  
           exwrite(fid, ex); % 写入负样本  
         end  
         tmp(i,:) = [b c score(I(i))];  
       end  
       boxes = [boxes; tmp];  
     end  
   
     if latent  
       % get best match  
       for x = 1:size(score,2)  
         for y = 1:size(score,1)  
           if score(y, x) > best    
             % 以该(y，x)为left-top点的rootfilter的范围在原图像中的位置  
             [x1, y1, x2, y2] = rootbox(x, y, scale, padx, pady, rsize{c});  
             % intesection with bbox  
             xx1 = max(x1, bbox(1));  
             yy1 = max(y1, bbox(2));  
             xx2 = min(x2, bbox(3));  
             yy2 = min(y2, bbox(4));  
             w = (xx2-xx1+1);  
             h = (yy2-yy1+1);  
             if w > 0 && h > 0  
               % check overlap with bbox  
               inter = w*h;  
               a = (x2-x1+1) * (y2-y1+1); % rootfilter 的面积  
               b = (bbox(3)-bbox(1)+1) * (bbox(4)-bbox(2)+1); % bbox的面积  
               % 计算很很独特，如果只是 inter / b 那么 如果a很大，只是一部分与 bounding box重合,那就不可靠了，人再怎么标注错误，也不会这么大  
               % 所以，a越大，要求的重合率越高才好，所以分母+a,是个不错的选择，但是这样减小的太多了，所以减去 inter  
               o = inter / (a+b-inter);  
               if (o >= overlap)  
                 %  
                 best = score(y, x);  
                 boxes = [x1 y1 x2 y2];  
                 % 这一部分一直被覆盖，最后保留的是 best样本  
                 if write                    
                   f = featr(y:y+rsize{c}(1)-1, x:x+rsize{c}(2)-1, :);  
                   rblocklabel = model.rootfilters{ridx{c}}.blocklabel;  
                   oblocklabel = model.offsets{oidx{c}}.blocklabel;        
                   xc = round(x + rsize{c}(2)/2 - padx);  
                   yc = round(y + rsize{c}(1)/2 - pady);            
                   ex = [];  
                   % label; id; level; xc; yc,正样本的重要信息！  
                   % xc,yc,居然是相对于剪切后的图片  
                   ex.header = [label; id; level; xc; yc; ...  
                                model.components{c}.numblocks; ...  
                                model.components{c}.dim];  
                   ex.offset.bl = oblocklabel;  
                   ex.offset.w = 1;  
                   ex.root.bl = rblocklabel;  
                   width1 = ceil(rsize{c}(2)/2);  
                   width2 = floor(rsize{c}(2)/2);  
                   f(:,1:width2,:) = f(:,1:width2,:) + flipfeat(f(:,width1+1:end,:));  
                   ex.root.w = f(:,1:width1,:); %样本特征  
                   ex.part = [];  
                 end  
                 for j = 1:numparts{c}  
                   %probex，probey综合得分最高的位置，相对于featp  
                   %px1，py1，px2，py2 转化成相对于featr  
                   [probex, probey, px, py, px1, py1, px2, py2] = ...  
                       partbox(x, y, ax{c,j}, ay{c,j}, scale, ...  
                               padx, pady, psize{c,j}, Ix{c,j}, Iy{c,j});  
                   boxes = [boxes px1 py1 px2 py2];  
                   if write  
                     if model.partfilters{pidx{c,j}}.fake  
                       continue;  
                     end  
                     p = featp(py:py+psize{c,j}(1)-1, ...  
                               px:px+psize{c,j}(2)-1, :);  
                     def = -[(probex-px)^2; probex-px; (probey-py)^2; probey-py];  
                     pblocklabel = model.partfilters{pidx{c,j}}.blocklabel;  
                     dblocklabel = model.defs{didx{c,j}}.blocklabel;  
                     partner = model.partfilters{pidx{c,j}}.partner;  
                     if partner > 0  
                       k = rpidx{partner}(2);  
                       [kprobex, kprobey, kpx, kpy, kpx1, kpy1, kpx2, kpy2] = ...  
                           partbox(x, y, ax{c,k}, ay{c,k}, scale, padx, pady, ...  
                                   psize{c,k}, Ix{c,k}, Iy{c,k});  
                       kp = featp(kpy:kpy+psize{c,k}(1)-1, ...  
                                  kpx:kpx+psize{c,k}(2)-1, :);  
                       % flip linear term in horizontal deformation model  
                       kdef = -[(kprobex-kpx)^2; kpx-kprobex; ...  
                                (kprobey-kpy)^2; kprobey-kpy];  
                       p = p + flipfeat(kp);  
                       def = def + kdef;  
                     else  
                       width1 = ceil(psize{c,j}(2)/2);  
                       width2 = floor(psize{c,j}(2)/2);  
                       p(:,1:width2,:) = p(:,1:width2,:) + ...  
                           flipfeat(p(:,width1+1:end,:));  
                       p = p(:,1:width1,:);  
                     end  
                     ex.part(j).bl = pblocklabel;  
                     ex.part(j).w = p;  
                     ex.def(j).bl = dblocklabel;  
                     ex.def(j).w = def;  
                   end  
                 end  
                 boxes = [boxes c best];  
               end  
             end  
           end  
         end  
       end  
     end  
   end  
 end  
   
 if latent && write && ~isempty(ex)  
   exwrite(fid, ex); %datfile  
 end  
   
 % The functions below compute a bounding box for a root or part   
 % template placed in the feature hierarchy.  
 %  
 % coordinates need to be transformed to take into account:  
 % 1. padding from convolution  
 % 2. scaling due to sbin & image subsampling  
 % 3. offset from feature computation      
 %  
   
 function [x1, y1, x2, y2] = rootbox(x, y, scale, padx, pady, rsize)  
 x1 = (x-padx)*scale+1;  %图像是先缩放（构造金字塔时）再打补丁  
 y1 = (y-pady)*scale+1;  
 x2 = x1 + rsize(2)*scale - 1; % 宽度也要缩放  
 y2 = y1 + rsize(1)*scale - 1;  
   
 function [probex, probey, px, py, px1, py1, px2, py2] = ...  
     partbox(x, y, ax, ay, scale, padx, pady, psize, Ix, Iy)  
 probex = (x-1)*2+ax; %最优位置  
 probey = (y-1)*2+ay;  
 px = double(Ix(probey, probex)); %综合得分最高的位置  
 py = double(Iy(probey, probex));  
 px1 = ((px-2)/2+1-padx)*scale+1; % pading是root的两倍  
 py1 = ((py-2)/2+1-pady)*scale+1;  
 px2 = px1 + psize(2)*scale/2 - 1;  
 py2 = py1 + psize(1)*scale/2 - 1;  
   
 % write an example to the data file  
 function exwrite(fid, ex)  
 fwrite(fid, ex.header, 'int32');  
 buf = [ex.offset.bl; ex.offset.w(:); ...  
        ex.root.bl; ex.root.w(:)];  
 fwrite(fid, buf, 'single');  
 for j = 1:length(ex.part)  
   if ~isempty(ex.part(j).w)  
     buf = [ex.part(j).bl; ex.part(j).w(:); ...  
            ex.def(j).bl; ex.def(j).w(:)];  
     fwrite(fid, buf, 'single');  
   end  
 end  

features.cc

[cpp]view plaincopy 
   
 
   
 #include <math.h>  
 #include "mex.h"  
   
 // small value, used to avoid division by zero  
 #define eps 0.0001  
   
 #define bzero(a, b) memset(a, 0, b)   
 int round(float a) { float tmp = a - (int)a; if( tmp >= 0.5 ) return (int)a + 1; else return (int)a; }  
 // unit vectors used to compute gradient orientation  
 // cos(20*i)  
 double uu[9] = {1.0000,   
         0.9397,   
         0.7660,   
         0.500,   
         0.1736,   
         -0.1736,   
         -0.5000,   
         -0.7660,   
         -0.9397};  
 //sin(20*i)  
 double vv[9] = {0.0000,   
         0.3420,   
         0.6428,   
         0.8660,   
         0.9848,   
         0.9848,   
         0.8660,   
         0.6428,   
         0.3420};  
   
 static inline double min(double x, double y) { return (x <= y ? x : y); }  
 static inline double max(double x, double y) { return (x <= y ? y : x); }  
   
 static inline int min(int x, int y) { return (x <= y ? x : y); }  
 static inline int max(int x, int y) { return (x <= y ? y : x); }  
   
 // main function:  
 // takes a double color image and a bin size   
 // returns HOG features  
 mxArray *process(const mxArray *mximage, const mxArray *mxsbin) {  
   double *im = (double *)mxGetPr(mximage);  
   const int *dims = mxGetDimensions(mximage);  
   if (mxGetNumberOfDimensions(mximage) != 3 ||  
       dims[2] != 3 ||  
       mxGetClassID(mximage) != mxDOUBLE_CLASS)  
     mexErrMsgTxt("Invalid input");  
   
   int sbin = (int)mxGetScalar(mxsbin);  
   
   // memory for caching orientation histograms & their norms  
   int blocks[2];  
   blocks[0] = (int)round((double)dims[0]/(double)sbin);//行  
   blocks[1] = (int)round((double)dims[1]/(double)sbin);//列  
   double *hist = (double *)mxCalloc(blocks[0]*blocks[1]*18, sizeof(double));//只需要计算18bin，9bin的推  
   double *norm = (double *)mxCalloc(blocks[0]*blocks[1], sizeof(double));  
   
   // memory for HOG features  
   int out[3];//size  
   out[0] = max(blocks[0]-2, 0);//减去2干嘛？？  
   out[1] = max(blocks[1]-2, 0);  
   out[2] = 27+4;  
   mxArray *mxfeat = mxCreateNumericArray(3, out, mxDOUBLE_CLASS, mxREAL);//特征,size=out   
   double *feat = (double *)mxGetPr(mxfeat);  
     
   int visible[2];  
   visible[0] = blocks[0]*sbin;  
   visible[1] = blocks[1]*sbin;  
   //先列再行  
   for (int x = 1; x < visible[1]-1; x++) {  
     for (int y = 1; y < visible[0]-1; y++) {  
       // first color channel  
       double *s = im + min(x, dims[1]-2)*dims[0] + min(y, dims[0]-2);//在im中的位置  
       double dy = *(s+1) - *(s-1);  
       double dx = *(s+dims[0]) - *(s-dims[0]); //坐标系是一样的，c和matlab的存储顺序不一样  
       double v = dx*dx + dy*dy;  
   
       // second color channel  
       s += dims[0]*dims[1];  
       double dy2 = *(s+1) - *(s-1);  
       double dx2 = *(s+dims[0]) - *(s-dims[0]);  
       double v2 = dx2*dx2 + dy2*dy2;  
   
       // third color channel  
       s += dims[0]*dims[1];  
       double dy3 = *(s+1) - *(s-1);  
       double dx3 = *(s+dims[0]) - *(s-dims[0]);  
       double v3 = dx3*dx3 + dy3*dy3;  
   
       // pick channel with strongest gradient，计算v  
       if (v2 > v) {  
         v = v2;  
         dx = dx2;  
         dy = dy2;  
           }   
           if (v3 > v) {  
         v = v3;  
         dx = dx3;  
         dy = dy3;  
       }  
   
       // snap to one of 18 orientations，就算角度best_o  
       double best_dot = 0;  
       int best_o = 0;  
       for (int o = 0; o < 9; o++) {  
         // (sinθ)^2+(cosθ)^2 =1  
         // max cosθ*dx+ sinθ*dy 对其求导，可得极大值 θ = arctan dy/dx  
         double dot = uu[o]*dx + vv[o]*dy;  
         if (dot > best_dot) {  
           best_dot = dot;  
           best_o = o;  
         } else if (-dot > best_dot) {  
           best_dot = -dot;  
           best_o = o+9;  
         }  
       }  
         
       // add to 4 histograms around pixel using linear interpolation  
       double xp = ((double)x+0.5)/(double)sbin - 0.5;  
       double yp = ((double)y+0.5)/(double)sbin - 0.5;  
       int ixp = (int)floor(xp);  
       int iyp = (int)floor(yp);  
       double vx0 = xp-ixp;  
       double vy0 = yp-iyp;  
       double vx1 = 1.0-vx0;  
       double vy1 = 1.0-vy0;  
       v = sqrt(v);  
     //左上角     
       if (ixp >= 0 && iyp >= 0) {  
         *(hist + ixp*blocks[0] + iyp + best_o*blocks[0]*blocks[1]) +=   
           vx1*vy1*v;  
       }  
       //右上角        
       if (ixp+1 < blocks[1] && iyp >= 0) {  
         *(hist + (ixp+1)*blocks[0] + iyp + best_o*blocks[0]*blocks[1]) +=   
           vx0*vy1*v;  
       }  
       //左下角  
       if (ixp >= 0 && iyp+1 < blocks[0]) {  
         *(hist + ixp*blocks[0] + (iyp+1) + best_o*blocks[0]*blocks[1]) +=   
           vx1*vy0*v;  
       }  
       //右下角  
       if (ixp+1 < blocks[1] && iyp+1 < blocks[0]) {  
         *(hist + (ixp+1)*blocks[0] + (iyp+1) + best_o*blocks[0]*blocks[1]) +=   
           vx0*vy0*v;  
       }  
     }  
   }  
   
   // compute energy in each block by summing over orientations  
   //计算每一个cell的 sum( ( v(oi)+v(oi+9) )^2 ),oi=0..8  
   for (int o = 0; o < 9; o++) {  
     double *src1 = hist + o*blocks[0]*blocks[1];  
     double *src2 = hist + (o+9)*blocks[0]*blocks[1];  
     double *dst = norm;  
     double *end = norm + blocks[1]*blocks[0];  
     while (dst < end) {  
       *(dst++) += (*src1 + *src2) * (*src1 + *src2);  
       src1++;  
       src2++;  
     }  
   }  
   
   // compute features  
   for (int x = 0; x < out[1]; x++) {  
     for (int y = 0; y < out[0]; y++) {  
       double *dst = feat + x*out[0] + y;        
       double *src, *p, n1, n2, n3, n4;  
   
       p = norm + (x+1)*blocks[0] + y+1;//右下角的constrain insensitive sum  
       n1 = 1.0 / sqrt(*p + *(p+1) + *(p+blocks[0]) + *(p+blocks[0]+1) + eps);  
       p = norm + (x+1)*blocks[0] + y;//右边  
       n2 = 1.0 / sqrt(*p + *(p+1) + *(p+blocks[0]) + *(p+blocks[0]+1) + eps);  
       p = norm + x*blocks[0] + y+1;//下边  
       n3 = 1.0 / sqrt(*p + *(p+1) + *(p+blocks[0]) + *(p+blocks[0]+1) + eps);  
       p = norm + x*blocks[0] + y;//自己        
       n4 = 1.0 / sqrt(*p + *(p+1) + *(p+blocks[0]) + *(p+blocks[0]+1) + eps);  
   
       double t1 = 0;  
       double t2 = 0;  
       double t3 = 0;  
       double t4 = 0;  
   
       // contrast-sensitive features  
       src = hist + (x+1)*blocks[0] + (y+1);  
       for (int o = 0; o < 18; o++) {  
         double h1 = min(*src * n1, 0.2);//截短  
         double h2 = min(*src * n2, 0.2);  
         double h3 = min(*src * n3, 0.2);  
         double h4 = min(*src * n4, 0.2);  
         *dst = 0.5 * (h1 + h2 + h3 + h4);//求和  
         t1 += h1;  
         t2 += h2;  
         t3 += h3;  
         t4 += h4;  
         dst += out[0]*out[1];//下一个bin  
         src += blocks[0]*blocks[1];  
       }  
   
       // contrast-insensitive features  
       src = hist + (x+1)*blocks[0] + (y+1);  
       for (int o = 0; o < 9; o++) {  
         double sum = *src + *(src + 9*blocks[0]*blocks[1]);  
         double h1 = min(sum * n1, 0.2);  
         double h2 = min(sum * n2, 0.2);  
         double h3 = min(sum * n3, 0.2);  
         double h4 = min(sum * n4, 0.2);  
         *dst = 0.5 * (h1 + h2 + h3 + h4);  
         dst += out[0]*out[1];  
         src += blocks[0]*blocks[1];  
       }  
   
       // texture features  
       *dst = 0.2357 * t1;  
       dst += out[0]*out[1];  
       *dst = 0.2357 * t2;  
       dst += out[0]*out[1];  
       *dst = 0.2357 * t3;  
       dst += out[0]*out[1];  
       *dst = 0.2357 * t4;  
     }  
   }  
   
   mxFree(hist);  
   mxFree(norm);  
   return mxfeat;  
 }  
   
 // matlab entry point  
 // F = features(image, bin)  
 // image should be color with double values  
 void mexFunction(int nlhs, mxArray *plhs[], int nrhs, const mxArray *prhs[]) {   
   if (nrhs != 2)  
     mexErrMsgTxt("Wrong number of inputs");   
   if (nlhs != 1)  
     mexErrMsgTxt("Wrong number of outputs");  
   plhs[0] = process(prhs[0], prhs[1]);  
 }  

dt.cc

[cpp]view plaincopy 
   
 
   
 #include <math.h>  
 #include <sys/types.h>  
 #include "mex.h"  
   
 #define int32_t int  
 /* 
  * Generalized distance transforms. 
  * We use a simple nlog(n) divide and conquer algorithm instead of the 
  * theoretically faster linear method, for no particular reason except 
  * that this is a bit simpler and I wanted to test it out. 
  * 
  * The code is a bit convoluted because dt1d can operate either along 
  * a row or column of an array.   
  */  
   
 static inline int square(int x) { return x*x; }  
   
 // dt helper function  
 void dt_helper(double *src, double *dst, int *ptr, int step,   
            int s1, int s2, int d1, int d2, double a, double b) {  
  if (d2 >= d1) {  
    int d = (d1+d2) >> 1;  
    int s = s1;  
    for (int p = s1+1; p <= s2; p++)  
      if (src[s*step] + a*square(d-s) + b*(d-s) >   
      src[p*step] + a*square(d-p) + b*(d-p))  
     s = p;  
    dst[d*step] = src[s*step] + a*square(d-s) + b*(d-s);  
    ptr[d*step] = s;  
    dt_helper(src, dst, ptr, step, s1, s, d1, d-1, a, b);  
    dt_helper(src, dst, ptr, step, s, s2, d+1, d2, a, b);  
  }  
 }  
   
 // dt of 1d array  
 void dt1d(double *src, double *dst, int *ptr, int step, int n,   
       double a, double b) {  
   dt_helper(src, dst, ptr, step, 0, n-1, 0, n-1, a, b);  
 }  
   
 // matlab entry point  
 // [M, Ix, Iy] = dt(vals, ax, bx, ay, by)  
 void mexFunction(int nlhs, mxArray *plhs[], int nrhs, const mxArray *prhs[]) {   
   if (nrhs != 5)  
     mexErrMsgTxt("Wrong number of inputs");   
   if (nlhs != 3)  
     mexErrMsgTxt("Wrong number of outputs");  
   if (mxGetClassID(prhs[0]) != mxDOUBLE_CLASS)  
     mexErrMsgTxt("Invalid input");  
   
   const int *dims = mxGetDimensions(prhs[0]);  
   double *vals = (double *)mxGetPr(prhs[0]);  
   double ax = mxGetScalar(prhs[1]);  
   double bx = mxGetScalar(prhs[2]);  
   double ay = mxGetScalar(prhs[3]);  
   double by = mxGetScalar(prhs[4]);  
     
   mxArray *mxM = mxCreateNumericArray(2, dims, mxDOUBLE_CLASS, mxREAL);  
   mxArray *mxIx = mxCreateNumericArray(2, dims, mxINT32_CLASS, mxREAL);  
   mxArray *mxIy = mxCreateNumericArray(2, dims, mxINT32_CLASS, mxREAL);  
   double *M = (double *)mxGetPr(mxM);  
   int32_t *Ix = (int32_t *)mxGetPr(mxIx);  
   int32_t *Iy = (int32_t *)mxGetPr(mxIy);  
   
   double *tmpM = (double *)mxCalloc(dims[0]*dims[1], sizeof(double)); // part map  
   int32_t *tmpIx = (int32_t *)mxCalloc(dims[0]*dims[1], sizeof(int32_t));  
   int32_t *tmpIy = (int32_t *)mxCalloc(dims[0]*dims[1], sizeof(int32_t));  
   
   for (int x = 0; x < dims[1]; x++)  
     dt1d(vals+x*dims[0], tmpM+x*dims[0], tmpIy+x*dims[0], 1, dims[0], ay, by);  
   
   for (int y = 0; y < dims[0]; y++)  
     dt1d(tmpM+y, M+y, tmpIx+y, dims[0], dims[1], ax, bx);  
   
   // get argmins and adjust for matlab indexing from 1  
   for (int x = 0; x < dims[1]; x++) {  
     for (int y = 0; y < dims[0]; y++) {  
       int p = x*dims[0]+y;  
       Ix[p] = tmpIx[p]+1;  
       Iy[p] = tmpIy[tmpIx[p]*dims[0]+y]+1;  
     }  
   }  
   
   mxFree(tmpM);  
   mxFree(tmpIx);  
   mxFree(tmpIy);  
   plhs[0] = mxM;  
   plhs[1] = mxIx;  
   plhs[2] = mxIy;  
 }