%功能:演示决策树算法在计算机视觉中的应用
%基于C4.5决策树实现图像二值化;
%环境:Win7,Matlab2012b
%Modi: NUDT-VAP
%时间:2015-4-10
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
close all; clear; clc;
%% Step 1 载入图像
Image = imread('flower_test.png');
Mask = imread('flower_mask.png');
figure; imshow(Image); title('Used Image');
figure; imshow(Mask); title('Used Mask');
% In the Mask:
% Mask(i,j) = 0 -> class 0
% Mask(i,j) = 255 -> class 1
% Mask(i,j) = 128 -> unknown
%% Step 2 选择训练数据
[M,N,L] = size(Image);
Data = reshape(Image,[M*N,3]);
pID = find(Mask==255);
nID = find(Mask==0);
pNum = size(pID,1);
nNum = size(nID,1);
%
TrainData = [Data(pID,:);Data(nID,:)]';
TrainValue = [1*ones([pNum,1]);0*ones([nNum,1])]';
TrainNum = pNum + nNum;
%% Step 3 训练
DivNum = 32;
TrainDataFeatures = uint8(TrainData/DivNum)+1;
Nbins = max(TrainDataFeatures(:));
inc_node = TrainNum*10/100;
discrete_dim = [Nbins,Nbins,Nbins];
tree = BuildC45Tree(TrainDataFeatures, TrainValue, inc_node, discrete_dim, max(discrete_dim));
%% Step 4 测试
TestDataFeatures = uint8(Data'/DivNum)+1;
targets = UseC45Tree(TestDataFeatures, 1:M*N, tree, discrete_dim, unique(TrainValue));
Results = reshape(targets,[M,N]);
%
figure; imshow(Results,[]); title('C4.5 Classification Results')
&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&
function tree = BuildC45Tree(features, targets, inc_node, discrete_dim, maxNbin)
% Input:
% features: Ni*L, Ni features and L samples
% targets: Uc classes
% inc_node: allow incorrect number
% discrete_dim: discrete number in each dim
% maxNbin: max(discrete_dim)
% base: default = 0
% Output:
% tree: C4.5 decision tree
%% Step 0-Get the size
[Ni, L] = size(features);
Uc = unique(targets);
% set default value
tree.dim = 0;
% tree.child(1:maxNbin) = zeros(1,maxNbin);
tree.split_loc = inf;
if isempty(features),
return;
end
%% Step 1-Stop Condition: feature dim is one or examples is small
if ((inc_node > L) || (L == 1) || (length(Uc) == 1)),
H = hist(targets, length(Uc));
[m, largest] = max(H);
tree.child = Uc(largest);
return;
end
%% Step 2-Otherwise: use C4.5 choose the best feature
% 2-1 Compute the node's I
for i = 1:length(Uc),
Pnode(i) = length(find(targets == Uc(i))) / L;
end
Inode = -sum(Pnode.*log(Pnode)/log(2));
% 2-2 For each dimension, compute the gain ratio impurity
% This is done separately for discrete and continuous features
delta_Ib = zeros(1, Ni);
split_loc = ones(1, Ni)*inf;
for i = 1:Ni,
data = features(i,:);
Nbins = length(unique(data));
if (discrete_dim(i)),
%This is a discrete feature
P = zeros(length(Uc), Nbins);
for j = 1:length(Uc),
for k = 1:Nbins,
indices = find((targets == Uc(j)) & (features(i,:) == k));
P(j,k) = length(indices);
end
end
Pk = sum(P);
P = P/L;
Pk = Pk/sum(Pk);
info = sum(-P.*log(eps+P)/log(2));
delta_Ib(i) = (Inode-sum(Pk.*info))/-sum(Pk.*log(eps+Pk)/log(2));
else
% This is a continuous feature
P = zeros(length(Uc), 2);
% Sort the features
[sorted_data, indices] = sort(data);
sorted_targets = targets(indices);
% Calculate the information for each possible split
I = zeros(1, L-1);
for j = 1:L-1,
for k =1:length(Uc),
P(k,1) = length(find(sorted_targets(1:j) == Uc(k)));
P(k,2) = length(find(sorted_targets(j+1:end) == Uc(k)));
end
Ps = sum(P)/L;
P = P/L;
info = sum(-P.*log(eps+P)/log(2));
I(j) = Inode - sum(info.*Ps);
end
[delta_Ib(i), s] = max(I);
split_loc(i) = sorted_data(s);
end
end
% 2-3 Find the dimension minimizing delta_Ib
[m, dim] = max(delta_Ib);
dims = 1:Ni;
tree.dim = dim;
% 2-4 Split along the 'dim' dimension
Nf = unique(features(dim,:));
Nbins = length(Nf);
if (discrete_dim(dim)),
%Discrete feature
for i = 1:Nbins,
indices = find(features(dim, :) == Nf(i));
tree.child(i) = BuildC45Tree(features(dims, indices), targets(indices), inc_node, discrete_dim(dims), maxNbin);
end
else
%Continuous feature
tree.split_loc = split_loc(dim);
indices1 = find(features(dim,:) <= split_loc(dim));
indices2 = find(features(dim,:) > split_loc(dim));
tree.child(1) = BuildC45Tree(features(dims, indices1), targets(indices1), inc_node, discrete_dim(dims), maxNbin);
tree.child(2) = BuildC45Tree(features(dims, indices2), targets(indices2), inc_node, discrete_dim(dims), maxNbin);
end
&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&
function targets = UseC45Tree(features, indices, tree, discrete_dim, Uc)
% Input:
% features: Ni*L, Ni features and L samples
% indices: index
% tree: C4.5 decision tree
% discrete_dim: discrete number in each dim
% Uc: target classes
% Output:
% targets: classification results
%% Step 0-initilize the results
targets = zeros(1, size(features,2));
%% Step 1-Stop condition: leaf node
if (tree.dim == 0)
% Reached the end of the tree
targets(indices) = tree.child;
return;
end
%% Step 2-Otherwise: use children node
% 2-1 First, find the dimension we are to work on
dim = tree.dim;
dims= 1:size(features,1);
% 2-2 And classify according to it
if (discrete_dim(dim) == 0),
% Continuous feature
in = indices(find(features(dim, indices) <= tree.split_loc));
targets = targets + UseC45Tree(features(dims, :), in, tree.child(1), discrete_dim(dims), Uc);
in = indices(find(features(dim, indices) > tree.split_loc));
targets = targets + UseC45Tree(features(dims, :), in, tree.child(2), discrete_dim(dims), Uc);
else
% Discrete feature
Uf = unique(features(dim,:));
for i = 1:length(Uf),
in = indices(find(features(dim, indices) == Uf(i)));
targets = targets + UseC45Tree(features(dims, :), in, tree.child(i), discrete_dim(dims), Uc);
end
end
&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&
function targets = UseID3Tree(features, indices, tree, Nbins, Uc)
% Input:
% features: Ni*L, Ni features and L samples
% indices: 1:Num
% tree: ID3 tree
% Nbins: features are in 1:Nbins
% Uc: target class ID
% Output:
% targets: classification results
%% Step 0-initilize the results
targets = zeros(1, size(features,2));
%% Step 1-Stop Condition: if the feature is one dimension
if (size(features,1) == 1)
for i = 1:Nbins
in = indices(find(features(indices) == i));
if ~isempty(in)
if isfinite(tree.child(i))
targets(in) = tree.child(i);
else
% No data was found in the training set for this bin, so choose it randomally
n = 1 + floor(rand(1)*length(Uc));
targets(in) = Uc(n);
end
end
end
return;
end
%% Step 2-Otherwise: use the children node for classification
% 2-1 First, find the dimension we are to work on
dim = tree.split_dim;
dims= find(~ismember(1:size(features,1), dim));
% 2-2 And classify according to it
for i = 1:Nbins
in = indices(find(features(dim, indices) == i));
targets = targets + UseID3Tree(features(dims, :), in, tree.child(i), Nbins, Uc);
end