极限学习机代码注释翻译

最新推荐文章于 2024-07-31 08:36:30 发布

hhsh49

最新推荐文章于 2024-07-31 08:36:30 发布

阅读量3.4k

点赞数 1

分类专栏：机器学习

机器学习专栏收录该内容

25 篇文章 0 订阅

订阅专栏

转自：http://blog.csdn.net/cutelily2014/article/details/51635713?locationNum=5

ELM(extreme learning machine)极限学习机算法在2004年由南洋理工的Guang-bin Huang提出，经过进来的发展已经呈现出很好的性能，现在我把算法原作者的部分代码贴出来，附上自己的一点解析。关于算法的推导和应用，以及进来我看的相关论文可以在链接下载http://download.csdn.NET/detail/cutelily2014/9546138。

在已经明白算法原理的基础上来看下面的代码，主要实现了分类和回归，也是我们一般处理数据的两个方面。

function [TrainingTime, TestingTime, TrainingAccuracy, TestingAccuracy] = elm(TrainingData_File, TestingData_File, Elm_Type, NumberofHiddenNeurons, ActivationFunction)

% Usage: elm(TrainingData_File, TestingData_File, Elm_Type, NumberofHiddenNeurons, ActivationFunction)
% OR: [TrainingTime, TestingTime, TrainingAccuracy, TestingAccuracy] = elm(TrainingData_File, TestingData_File, Elm_Type, NumberofHiddenNeurons, ActivationFunction)
%
% Input:
% TrainingData_File - Filename of training data set
% TestingData_File - Filename of testing data set
% Elm_Type - 0 for regression; 1 for (both binary and multi-classes) classification
% NumberofHiddenNeurons - Number of hidden neurons assigned to the ELM
% ActivationFunction - Type of activation function:
% 'sig' for Sigmoidal function
% 'sin' for Sine function
% 'hardlim' for Hardlim function
% 'tribas' for Triangular basis function
% 'radbas' for Radial basis function (for additive type of SLFNs instead of RBF type of SLFNs)
%
% Output:
% TrainingTime - Time (seconds) spent on training ELM
% TestingTime - Time (seconds) spent on predicting ALL testing data
% TrainingAccuracy - Training accuracy:
% RMSE for regression or correct classification rate for classification
% TestingAccuracy - Testing accuracy:
% RMSE for regression or correct classification rate for classification
%
% MULTI-CLASSE CLASSIFICATION: NUMBER OF OUTPUT NEURONS WILL BE AUTOMATICALLY SET EQUAL TO NUMBER OF CLASSES
% FOR EXAMPLE, if there are 7 classes in all, there will have 7 output
% neurons; neuron 5 has the highest output means input belongs to 5-th class
%
% Sample1 regression: [TrainingTime, TestingTime, TrainingAccuracy, TestingAccuracy] = elm('sinc_train', 'sinc_test', 0, 20, 'sig')
% Sample2 classification: elm('diabetes_train', 'diabetes_test', 1, 20, 'sig')
%
%%%% Authors: MR QIN-YU ZHU AND DR GUANG-BIN HUANG
%%%% NANYANG TECHNOLOGICAL UNIVERSITY, SINGAPORE
%%%% EMAIL: EGBHUANG@NTU.EDU.SG; GBHUANG@IEEE.ORG
%%%% WEBSITE: http://www.ntu.edu.sg/eee/icis/cv/egbhuang.htm
%%%% DATE: APRIL 2004

%%%%%%%%%%% Macro definition
REGRESSION=0;
CLASSIFIER=1;

%%%%%%%%%%% Load training dataset，训练集的数据
train_data=load(TrainingData_File); %此处的train_data是一个矩阵形式
T=train_data(:,1)'; %矩阵train_data的第一列存放神经网络的输出，将其提取出来变成一行，赋予T
P=train_data(:,2:size(train_data,2))';%train_data的第二列与之后各列为神经网络的输入，将其变成行，赋予P
clear train_data; % Release raw training data array

%%%%%%%%%%% Load testing dataset，测试集的数据
test_data=load(TestingData_File);%此处的test_data是一个矩阵形式
TV.T=test_data(:,1)'; %相同方法获得TV.T，作为测试集的输出
TV.P=test_data(:,2:size(test_data,2))';%相同方法获得TV.P，作为测试集的输入
clear test_data; % Release raw testing data array

NumberofTrainingData=size(P,2);
NumberofTestingData=size(TV.P,2);
NumberofInputNeurons=size(P,1);

if Elm_Type~=REGRESSION %当算法用于分类时，必须先获得类别集合，还有每个输出属于哪个类别
%%%%%%%%%%%% Preprocessing the data of classification
sorted_target=sort(cat(2,T,TV.T),2);
label=zeros(1,1); % Find and save in 'label' class label from training and testing data sets
label(1,1)=sorted_target(1,1);
j=1;
for i = 2:(NumberofTrainingData+NumberofTestingData)
if sorted_target(1,i) ~= label(1,j)
j=j+1;
label(1,j) = sorted_target(1,i);%找到所有的类别，存放于label
end
end
number_class=j;
NumberofOutputNeurons=number_class;

%%%%%%%%%% Processing the targets of training
temp_T=zeros(NumberofOutputNeurons, NumberofTrainingData);
for i = 1:NumberofTrainingData
for j = 1:number_class
if label(1,j) == T(1,i)
break;
end
end
temp_T(j,i)=1;
end
T=temp_T*2-1;%生成一个只包含-1 & 1的矩阵，尺寸为NumberofOutputNeurons*NumberofTrainingData，每行代表一个类别，每列代表一个样本，若T(i,j)==1说明第j个样本属于第i个类别

%%%%%%%%%% Processing the targets of testing
temp_TV_T=zeros(NumberofOutputNeurons, NumberofTestingData);
for i = 1:NumberofTestingData
for j = 1:number_class
if label(1,j) == TV.T(1,i)
break;
end
end
temp_TV_T(j,i)=1;
end
TV.T=temp_TV_T*2-1;%同理生成测试集的类别矩阵

end % end if of Elm_Type，对分类情况下数据的预处理完成

%%%%%%%%%%% Calculate weights & biases
start_time_train=cputime;

%%%%%%%%%%% Random generate input weights InputWeight (w_i) and biases BiasofHiddenNeurons (b_i) of hidden neurons
InputWeight=rand(NumberofHiddenNeurons,NumberofInputNeurons)*2-1; %随机生成输入权重w [-1,1]
BiasofHiddenNeurons=rand(NumberofHiddenNeurons,1);%随机生成输入偏移量b [0,1]
tempH=InputWeight*P; %计算出 w*X
clear P; % Release input of training data
ind=ones(1,NumberofTrainingData);
BiasMatrix=BiasofHiddenNeurons(:,ind); % Extend the bias matrix BiasofHiddenNeurons to match the demention of H
tempH=tempH+BiasMatrix; %计算出 w*X+b

%%%%%%%%%%% Calculate hidden neuron output matrix H，以下根据选择的不同激励函数，计算出隐藏层的

%输出 H=G(w*X+b)
switch lower(ActivationFunction)
case {'sig','sigmoid'}
%%%%%%%% Sigmoid
H = 1 ./ (1 + exp(-tempH));
case {'sin','sine'}
%%%%%%%% Sine
H = sin(tempH);
case {'hardlim'}
%%%%%%%% Hard Limit
H = double(hardlim(tempH));
case {'tribas'}
%%%%%%%% Triangular basis function
H = tribas(tempH);
case {'radbas'}
%%%%%%%% Radial basis function
H = radbas(tempH);
%%%%%%%% More activation functions can be added here
end
clear tempH; % Release the temparary array for calculation of hidden neuron output matrix H

%%%%%%%%%%% Calculate output weights OutputWeight (beta_i)
OutputWeight=pinv(H') * T'; % slower implementation,最后计算出输出层权重beta=pinv(H)*T'

%If you use faster methods or kernel method, PLEASE CITE in your paper properly:
%Guang-Bin Huang, Hongming Zhou, Xiaojian Ding, and Rui Zhang, "Extreme Learning Machine for Regression %and Multi-Class Classification," submitted to IEEE Transactions on Pattern Analysis and Machine Intelligence, %October 2010.

end_time_train=cputime;
TrainingTime=end_time_train-start_time_train % Calculate CPU time (seconds) spent for training ELM

%%%%%%%%%%% Calculate the training accuracy
Y=(H' * OutputWeight)'; % Y: the actual output of the training data
if Elm_Type == REGRESSION
TrainingAccuracy=sqrt(mse(T - Y)) % Calculate training accuracy (RMSE) for regression case
end
clear H;

%%%%%%%%%%% Calculate the output of testing input
start_time_test=cputime;
tempH_test=InputWeight*TV.P;
clear TV.P; % Release input of testing data
ind=ones(1,NumberofTestingData);
BiasMatrix=BiasofHiddenNeurons(:,ind); % Extend the bias matrix BiasofHiddenNeurons to match the demention of H
tempH_test=tempH_test + BiasMatrix;
switch lower(ActivationFunction)
case {'sig','sigmoid'}
%%%%%%%% Sigmoid
H_test = 1 ./ (1 + exp(-tempH_test));
case {'sin','sine'}
%%%%%%%% Sine
H_test = sin(tempH_test);
case {'hardlim'}
%%%%%%%% Hard Limit
H_test = hardlim(tempH_test);
case {'tribas'}
%%%%%%%% Triangular basis function
H_test = tribas(tempH_test);
case {'radbas'}
%%%%%%%% Radial basis function
H_test = radbas(tempH_test);
%%%%%%%% More activation functions can be added here
end
TY=(H_test' * OutputWeight)'; % TY: the actual output of the testing data
end_time_test=cputime;
TestingTime=end_time_test-start_time_test % Calculate CPU time (seconds) spent by ELM predicting the whole testing data

%以下为对回归和分类情况下算法性能的指标计算
if Elm_Type == REGRESSION
TestingAccuracy=sqrt(mse(TV.T - TY)) % Calculate testing accuracy (RMSE) for regression case
end

if Elm_Type == CLASSIFIER
%%%%%%%%%% Calculate training & testing classification accuracy
MissClassificationRate_Training=0;
MissClassificationRate_Testing=0;

for i = 1 : size(T, 2)
[x, label_index_expected]=max(T(:,i));
[x, label_index_actual]=max(Y(:,i));
if label_index_actual~=label_index_expected
MissClassificationRate_Training=MissClassificationRate_Training+1;
end
end
TrainingAccuracy=1-MissClassificationRate_Training/size(T,2)
for i = 1 : size(TV.T, 2)
[x, label_index_expected]=max(TV.T(:,i));
[x, label_index_actual]=max(TY(:,i));
if label_index_actual~=label_index_expected
MissClassificationRate_Testing=MissClassificationRate_Testing+1;
end
end
TestingAccuracy=1-MissClassificationRate_Testing/size(TV.T,2)
end