对“视觉机器学习20讲配套仿真代码”的研究心得---决策树学习

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%功能:演示决策树算法在计算机视觉中的应用
%基于C4.5决策树实现图像二值化;
%环境:Win7,Matlab2012b
%Modi: NUDT-VAP
%时间:2015-4-10
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%




close all; clear; clc;
%% Step 1  载入图像
Image = imread('flower_test.png');
Mask = imread('flower_mask.png');
figure; imshow(Image); title('Used Image');
figure; imshow(Mask); title('Used Mask');
% In the Mask:
%           Mask(i,j) = 0   -> class 0
%           Mask(i,j) = 255 -> class 1
%           Mask(i,j) = 128 -> unknown
%% Step 2 选择训练数据
[M,N,L] = size(Image);
Data = reshape(Image,[M*N,3]);
pID = find(Mask==255);
nID = find(Mask==0);
pNum = size(pID,1);
nNum = size(nID,1);

TrainData = [Data(pID,:);Data(nID,:)]';
TrainValue = [1*ones([pNum,1]);0*ones([nNum,1])]';
TrainNum = pNum + nNum;
%% Step 3 训练
DivNum = 32;
TrainDataFeatures = uint8(TrainData/DivNum)+1;
Nbins = max(TrainDataFeatures(:));
inc_node = TrainNum*10/100;
discrete_dim = [Nbins,Nbins,Nbins];
tree = BuildC45Tree(TrainDataFeatures, TrainValue, inc_node, discrete_dim, max(discrete_dim));
%% Step 4 测试
TestDataFeatures = uint8(Data'/DivNum)+1;
targets = UseC45Tree(TestDataFeatures, 1:M*N, tree, discrete_dim, unique(TrainValue));
Results = reshape(targets,[M,N]);

figure; imshow(Results,[]); title('C4.5 Classification Results')










&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&

function tree = BuildC45Tree(features, targets, inc_node, discrete_dim, maxNbin)
% Input:
%       features:       Ni*L, Ni features and L samples
%       targets:        Uc classes
%       inc_node:       allow incorrect number 
%       discrete_dim:   discrete number in each dim
%       maxNbin:        max(discrete_dim)
%       base:           default = 0
% Output:
%       tree:           C4.5 decision tree
%% Step 0-Get the size
[Ni, L]         = size(features);
Uc              = unique(targets);
% set default value
tree.dim        = 0;
% tree.child(1:maxNbin) = zeros(1,maxNbin);
tree.split_loc  = inf;
if isempty(features),
    return;
end
%% Step 1-Stop Condition: feature dim is one or examples is small
if ((inc_node > L) || (L == 1) || (length(Uc) == 1)),
    H = hist(targets, length(Uc));
    [m, largest] = max(H);
    tree.child = Uc(largest);
    return;
end
%% Step 2-Otherwise: use C4.5 choose the best feature
% 2-1 Compute the node's I
for i = 1:length(Uc),
    Pnode(i) = length(find(targets == Uc(i))) / L;
end
Inode = -sum(Pnode.*log(Pnode)/log(2));
% 2-2 For each dimension, compute the gain ratio impurity
% This is done separately for discrete and continuous features
delta_Ib    = zeros(1, Ni);
split_loc = ones(1, Ni)*inf;
for i = 1:Ni,
    data = features(i,:);
    Nbins = length(unique(data));
    if (discrete_dim(i)),
        %This is a discrete feature
        P = zeros(length(Uc), Nbins);
        for j = 1:length(Uc),
            for k = 1:Nbins,
                indices = find((targets == Uc(j)) & (features(i,:) == k));
                P(j,k)  = length(indices);
            end
        end
        Pk = sum(P);
        P   = P/L;
        Pk          = Pk/sum(Pk);
        info        = sum(-P.*log(eps+P)/log(2));
        delta_Ib(i) = (Inode-sum(Pk.*info))/-sum(Pk.*log(eps+Pk)/log(2));
    else
        % This is a continuous feature
        P = zeros(length(Uc), 2);
        % Sort the features
        [sorted_data, indices] = sort(data);
        sorted_targets = targets(indices);
        % Calculate the information for each possible split
        I = zeros(1, L-1);
        for j = 1:L-1,
            for k =1:length(Uc),
                P(k,1) = length(find(sorted_targets(1:j) == Uc(k)));
                P(k,2) = length(find(sorted_targets(j+1:end) == Uc(k)));
            end
            Ps = sum(P)/L;
            P = P/L;
            info = sum(-P.*log(eps+P)/log(2));
            I(j) = Inode - sum(info.*Ps);   
        end
        [delta_Ib(i), s] = max(I);
split_loc(i) = sorted_data(s);      
    end
end
% 2-3 Find the dimension minimizing delta_Ib 
[m, dim] = max(delta_Ib);
dims     = 1:Ni;
tree.dim = dim;
% 2-4 Split along the 'dim' dimension
Nf       = unique(features(dim,:));
Nbins = length(Nf);
if (discrete_dim(dim)),
    %Discrete feature
    for i = 1:Nbins,
        indices     = find(features(dim, :) == Nf(i));
        tree.child(i) = BuildC45Tree(features(dims, indices), targets(indices), inc_node, discrete_dim(dims), maxNbin);
    end
else
    %Continuous feature
    tree.split_loc = split_loc(dim);
    indices1   = find(features(dim,:) <= split_loc(dim));
    indices2   = find(features(dim,:) > split_loc(dim));
    tree.child(1) = BuildC45Tree(features(dims, indices1), targets(indices1), inc_node, discrete_dim(dims), maxNbin);
    tree.child(2) = BuildC45Tree(features(dims, indices2), targets(indices2), inc_node, discrete_dim(dims), maxNbin);
end




&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&

function targets = UseC45Tree(features, indices, tree, discrete_dim, Uc)
% Input:
%       features:       Ni*L, Ni features and L samples
%       indices:        index
%       tree:           C4.5 decision tree
%       discrete_dim:   discrete number in each dim
%       Uc:             target classes
% Output:
%       targets:        classification results
%% Step 0-initilize the results
targets = zeros(1, size(features,2));
%% Step 1-Stop condition: leaf node
if (tree.dim == 0)
   % Reached the end of the tree
   targets(indices) = tree.child;
   return;
end
%% Step 2-Otherwise: use children node
% 2-1 First, find the dimension we are to work on
dim = tree.dim;
dims= 1:size(features,1);
% 2-2 And classify according to it
if (discrete_dim(dim) == 0),
    % Continuous feature
    in = indices(find(features(dim, indices) <= tree.split_loc));
    targets = targets + UseC45Tree(features(dims, :), in, tree.child(1), discrete_dim(dims), Uc);
    in = indices(find(features(dim, indices) >  tree.split_loc));
    targets = targets + UseC45Tree(features(dims, :), in, tree.child(2), discrete_dim(dims), Uc);
else
    % Discrete feature
    Uf = unique(features(dim,:));
    for i = 1:length(Uf),
        in   = indices(find(features(dim, indices) == Uf(i)));
        targets = targets + UseC45Tree(features(dims, :), in, tree.child(i), discrete_dim(dims), Uc);
   end
end


&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&

function targets = UseID3Tree(features, indices, tree, Nbins, Uc)
% Input:
%       features:   Ni*L, Ni features and L samples
%       indices:    1:Num
%       tree:       ID3 tree
%       Nbins:      features are in 1:Nbins
%       Uc:         target class ID
% Output:
%       targets:    classification results
%% Step 0-initilize the results
targets = zeros(1, size(features,2));
%% Step 1-Stop Condition: if the feature is one dimension
if (size(features,1) == 1)
    for i = 1:Nbins
        in = indices(find(features(indices) == i));
        if ~isempty(in)
            if isfinite(tree.child(i))
                targets(in) = tree.child(i);
            else
                % No data was found in the training set for this bin, so choose it randomally
                n           = 1 + floor(rand(1)*length(Uc));
                targets(in) = Uc(n);
            end
        end
    end
    return;
end
%% Step 2-Otherwise: use the children node for classification
% 2-1 First, find the dimension we are to work on
dim = tree.split_dim;
dims= find(~ismember(1:size(features,1), dim));
% 2-2 And classify according to it
for i = 1:Nbins
    in      = indices(find(features(dim, indices) == i));
    targets = targets + UseID3Tree(features(dims, :), in, tree.child(i), Nbins, Uc);
end

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值