首先声明此版本为V3.1。因为和论文最相符。V4增加了模型数由2个增加为6个,V5提取了语义特征。源码太长纯代码应该在2K+,只选取了核心部分代码
demo.m
function demo()
test('000034.jpg', 'car');
test('000061.jpg', 'person');
test('000084.jpg', 'bicycle');
function test(name, cls)
% load and display image
im=imread(name);
clf;
image(im);
axis equal;
axis on;
disp('input image');
disp('press any key to continue'); pause;
% load and display model
load(['VOC2007/' cls '_final']); %加载模型
visualizemodel(model);
disp([cls ' model']);
disp('press any key to continue'); pause;
% detect objects
boxes = detect(im, model, 0); %model为mat中的结构体
top = nms(boxes, 0.5); %Non-maximum suppression.
showboxes(im, top);
%print(gcf, '-djpeg90', '-r0', [cls '.jpg']);
disp('detections');
disp('press any key to continue'); pause;
% get bounding boxes
bbox = getboxes(model, boxes); %根据检测到的root,parts,预测bounding
top = nms(bbox, 0.5);
bbox = clipboxes(im, top); %预测出来的bounding,可能会超过图像原始尺寸,所以要减掉
showboxes(im, bbox);
disp('bounding boxes');
disp('press any key to continue'); pause;
detect.m
function [boxes] = detect(input, model, thresh, bbox, ...
overlap, label, fid, id, maxsize)
% 论文 fig.4
% boxes = detect(input, model, thresh, bbox, overlap, label, fid, id, maxsize)
% Detect objects in input using a model and a score threshold.
% Higher threshold leads to fewer detections.
% boxes = [rx1 ry1 rx2 ry2 | px1 py1 px2 py2 ...| componetindex | score ]
% The function returns a matrix with one row per detected object. The
% last column of each row gives the score of the detection. The
% column before last specifies the component used for the detection.
% The first 4 columns specify the bounding box for the root filter and
% subsequent columns specify the bounding boxes of each part.
%
% If bbox is not empty, we pick best detection with significant overlap.
% If label and fid are included, we write feature vectors to a data file.
%phase 2: im, model, 0, bbox, overlap, 1, fid, 2*i-1
% trian boxex : detect(im, model, 0, bbox, overlap)
if nargin > 3 && ~isempty(bbox)
latent = true;
else
latent = false;
end
if nargin > 6 && fid ~= 0
write = true;
else
write = false;
end
if nargin < 9
maxsize = inf;
end
% we assume color images
input = color(input); %如果是灰度图,扩充为三通道 R=G=B=Gray
% prepare model for convolutions
rootfilters = [];
for i = 1:length(model.rootfilters) %
rootfilters{i} = model.rootfilters{i}.w;% r*w*31维向量,9(方向范围 0~180) +18(方向范围 0-360)+4(cell熵和)
end
partfilters = [];
for i = 1:length(model.partfilters)
partfilters{i} = model.partfilters{i}.w;
end
% cache some data 获取所有 root,part的所有信息
for c = 1:model.numcomponents % releas3.1 一种对象,只有2个模型,releas5 有3*2个模型
ridx{c} = model.components{c}.rootindex; % m1=1,m2=2
oidx{c} = model.components{c}.offsetindex; %o1=1,o2=2
root{c} = model.rootfilters{ridx{c}}.w;
rsize{c} = [size(root{c},1) size(root{c},2)]; %root size,单位为 sbin*sbin的block块,相当于原始HOG中的一个cell
numparts{c} = length(model.components{c}.parts); %目前为固定值6个,但是有些part是 fake
for j = 1:numparts{c}
pidx{c,j} = model.components{c}.parts{j}.partindex; %part是在该对象的所有component的part下连续编号
didx{c,j} = model.components{c}.parts{j}.defindex; % 在 rootfiter中的 anchor location
part{c,j} = model.partfilters{pidx{c,j}}.w; % 6*6*31
psize{c,j} = [size(part{c,j},1) size(part{c,j},2)]; %
% reverse map from partfilter index to (component, part#)
rpidx{pidx{c,j}} = [c j];
end
end
% we pad the feature maps to detect partially visible objects
padx = ceil(model.maxsize(2)/2+1); % 7/2+1 = 5
pady = ceil(model.maxsize(1)/2+1); % 11/2+1 = 7
% the feature pyramid
interval = model.interval; %10
%--------------------------------特征金字塔---------------------------------------------------------
% feat的尺寸为 img.rows/sbin,img.cols/sbin
% scales:缩放了多少
[feat, scales] = featpyramid(input, model.sbin, interval); % 8,10
% detect at each scale
best = -inf;
ex = [];
boxes = [];
%---------------------逐层检测目标-----------------------------------------------------------%
for level = interval+1:length(feat) %注意是从第二层开始
scale = model.sbin/scales(level); % 1/缩小了多少
if size(feat{level}, 1)+2*pady < model.maxsize(1) || ... %扩展后还是未能达到 能同时计算两个component的得分
size(feat{level}, 2)+2*padx < model.maxsize(2) || ...
(write && ftell(fid) >= maxsize) %已经没有空间保存样本了
continue;
end
if latent %训练时使用,检测时跳过
skip = true;
for c = 1:model.numcomponents
root_area = (rsize{c}(1)*scale) * (rsize{c}(2)*scale);% rootfilter
box_area = (bbox(3)-bbox(1)+1) * (bbox(4)-bbox(2)+1); % bbox该class 所有 rootfilter 的交集即minsize
if (root_area/box_area) >= overlap && (box_area/root_area) >= overlap %这句话真纠结,a>=0.7b,b>=0.7a -> a>=0.7b>=0.49a
skip = false;
end
end
if skip