% Version 1.000
%
% Code provided by Ruslan Salakhutdinov and Geoff Hinton
%
% Permission is granted for anyone to copy, use, modify, or distribute this
% program and accompanying programs and documents for any purpose, provided
% this copyright notice is retained and prominently displayed, along with
% a note saying that the original programs are available from our
% web page.
% The programs and documents are distributed without any warranty, express or
% implied. As the programs were written for research purposes only, they have
% not been tested to the degree that would be advisable in any important
% application. All use of these programs is entirely at the user's own risk.
% This program fine-tunes an autoencoder with backpropagation.
% Weights of the autoencoder are going to be saved in mnist_weights.mat
% and trainig and test reconstruction errors in mnist_error.mat
% You can also set maxepoch, default value is 200 as in our paper.
maxepoch=200;%%迭代次数为200次
fprintf(1,'\nFine-tuning deep autoencoder by minimizing cross entropy error. \n');%%微调通过最小化交叉熵来实现
fprintf(1,'60 batches of 1000 cases each. \n');
load mnistvh%%分别加载4个rbm的参数
load mnisthp
load mnisthp2
load mnistpo
makebatches;%%分批
[numcases numdims numbatches]=size(batchdata);
N=numcases; %%每一批的大小
%%%% PREINITIALIZE WEIGHTS OF THE AUTOENCODER %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
w1=[vishid; hidrecbiases];%%785*1000分别装载每一层的权重和偏置值,将它们作为一个整体一起迭代更新。
w2=[hidpen; penrecbiases];%%1001*500
w3=[hidpen2; penrecbiases2];%%501*250
w4=[hidtop; toprecbiases];%%251*30
w5=[hidtop'; topgenbiases]; %%31*250(以下均为反权值)
w6=[hidpen2'; hidgenbiases2]; %%251*500
w7=[hidpen'; hidgenbiases]; %%501*1000
w8=[vishid'; visbiases];%%1001*784
%%%%%%%%%% END OF PREINITIALIZATION OF WEIGHTS %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
l1=size(w1,1)-1;%%返回行数减1,即784;每个网络层中节点的个数
l2=size(w2,1)-1;%%返回行数减1,即1000;每个网络层中节点的个数
l3=size(w3,1)-1;%%返回行数减1,即500;每个网络层中节点的个数
l4=size(w4,1)-1;%%返回行数减1,即250;每个网络层中节点的个数
l5=size(w5,1)-1;%%返回行数减1,即30;每个网络层中节点的个数
l6=size(w6,1)-1;%%返回行数减1,即250;每个网络层中节点的个数
l7=size(w7,1)-1;%%返回行数减1,即500;每个网络层中节点的个数
l8=size(w8,1)-1;%%返回行数减1,即1000;每个网络层中节点的个数
l9=l1; %%输出层节点和输出层的一样,即784
test_err=[];%%测试误差
train_err=[];%%训练误差
for epoch = 1:maxepoch %%迭代200
%%%%%%%%%%%%%%%%%%%% COMPUTE TRAINING RECONSTRUCTION ERROR %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%计算训练样本重构误差;
err=0;
[numcases numdims numbatches]=size(batchdata);
N=numcases;%%100
for batch = 1:numbatches%%600
data = [batchdata(:,:,batch)];
data = [data ones(N,1)];%%补上一维偏置项100*785
w1probs = 1./(1 + exp(-data*w1)); w1probs = [w1probs ones(N,1)];%%正向传播,计算每一层的输出,且同时在输出上增加一维(值为常量1,偏置项值)
w2probs = 1./(1 + exp(-w1probs*w2)); w2probs = [w2probs ones(N,1)];%%data的每一行为一个样本(28*28),共100行与权值相乘求和。每一个样本都有自己的权值
w3probs = 1./(1 + exp(-w2probs*w3)); w3probs = [w3probs ones(N,1)];
w4probs = w3probs*w4; w4probs = [w4probs ones(N,1)];%%这里没有sigmoid函数
w5probs = 1./(1 + exp(-w4probs*w5)); w5probs = [w5probs ones(N,1)];%%反向训练
w6probs = 1./(1 + exp(-w5probs*w6)); w6probs = [w6probs ones(N,1)];
w7probs = 1./(1 + exp(-w6probs*w7)); w7probs = [w7probs ones(N,1)];
dataout = 1./(1 + exp(-w7probs*w8));
err= err + 1/N*sum(sum( (data(:,1:end-1)-dataout).^2 )); %%重构的误差值
end
train_err(epoch)=err/numbatches;%%训练样本总的误差值
%%%%%%%%%%%%%% END OF COMPUTING TRAINING RECONSTRUCTION ERROR %%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%% DISPLAY FIGURE TOP ROW REAL DATA BOTTOM ROW RECONSTRUCTIONS %%%%%%%%%%%%%%%%%%%%%%%%%
fprintf(1,'Displaying in figure 1: Top row - real data, Bottom row -- reconstructions \n');
output=[];
for ii=1:15
output = [output data(ii,1:end-1)' dataout(ii,:)'];%%输出时15组,每组两列,分别为理论值和重构值
end
if epoch==1
close all
figure('Position',[100,600,1000,200]);
else
figure(1)
end
mnistdisp(output);
drawnow;
%%%%%%%%%%%%%%%%%%%% COMPUTE TEST RECONSTRUCTION ERROR %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%计算测试样本重构误差,测试样本数为10000
[testnumcases testnumdims testnumbatches]=size(testbatchdata);%%100*784*100
N=testnumcases;%%100
err=0;
for batch = 1:testnumbatches%%测试样本分批
data = [testbatchdata(:,:,batch)];
data = [data ones(N,1)];
w1probs = 1./(1 + exp(-data*w1)); w1probs = [w1probs ones(N,1)];%%计算每一层的输出,且同时在输出上增加一维(值为常量1,偏置项值)
w2probs = 1./(1 + exp(-w1probs*w2)); w2probs = [w2probs ones(N,1)];
w3probs = 1./(1 + exp(-w2probs*w3)); w3probs = [w3probs ones(N,1)];
w4probs = w3probs*w4; w4probs = [w4probs ones(N,1)];
w5probs = 1./(1 + exp(-w4probs*w5)); w5probs = [w5probs ones(N,1)];
w6probs = 1./(1 + exp(-w5probs*w6)); w6probs = [w6probs ones(N,1)];
w7probs = 1./(1 + exp(-w6probs*w7)); w7probs = [w7probs ones(N,1)];
dataout = 1./(1 + exp(-w7probs*w8));
err = err + 1/N*sum(sum( (data(:,1:end-1)-dataout).^2 ));
end
test_err(epoch)=err/testnumbatches;
fprintf(1,'Before epoch %d Train squared error: %6.3f Test squared error: %6.3f \t \t \n',epoch,train_err(epoch),test_err(epoch));
%%%%%%%%%%%%%% END OF COMPUTING TEST RECONSTRUCTION ERROR %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
tt=0;
for batch = 1:numbatches/10 %%样本numbatches是600. 将10个小批量数据合为1个
fprintf(1,'epoch %d batch %d\r',epoch,batch);
%%%%%%%%%%% COMBINE 10 MINIBATCHES INTO 1 LARGER MINIBATCH %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
tt=tt+1;
data=[];
for kk=1:10
data=[data
batchdata(:,:,(tt-1)*10+kk)];
end
%%%%%%%%%%%%%%% PERFORM CONJUGATE GRADIENT WITH 3 LINESEARCHES %%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%共轭梯度线性搜索CG
max_iter=3;%%最大迭代次数
VV = [w1(:)' w2(:)' w3(:)' w4(:)' w5(:)' w6(:)' w7(:)' w8(:)']';%%把所有权值(已含偏置项)变成一个大的列向量
Dim = [l1; l2; l3; l4; l5; l6; l7; l8; l9];%%每层网络对应节点的个数(不含偏置项)
[X, fX] = minimize(VV,'CG_MNIST',max_iter,Dim,data);%%应用到CG算法和minimize函数
w1 = reshape(X(1:(l1+1)*l2),l1+1,l2);%%将元素1到元素785000调整元素成785*1000矩阵(权重和偏置存放在w1)
xxx = (l1+1)*l2;
w2 = reshape(X(xxx+1:xxx+(l2+1)*l3),l2+1,l3);%%将元素785001到元素128500调整元素成1001*500矩阵(权重和偏置存放在w2)
xxx = xxx+(l2+1)*l3;
w3 = reshape(X(xxx+1:xxx+(l3+1)*l4),l3+1,l4);
xxx = xxx+(l3+1)*l4;
w4 = reshape(X(xxx+1:xxx+(l4+1)*l5),l4+1,l5);
xxx = xxx+(l4+1)*l5;
w5 = reshape(X(xxx+1:xxx+(l5+1)*l6),l5+1,l6);
xxx = xxx+(l5+1)*l6;
w6 = reshape(X(xxx+1:xxx+(l6+1)*l7),l6+1,l7);
xxx = xxx+(l6+1)*l7;
w7 = reshape(X(xxx+1:xxx+(l7+1)*l8),l7+1,l8);
xxx = xxx+(l7+1)*l8;
w8 = reshape(X(xxx+1:xxx+(l8+1)*l9),l8+1,l9);
%%%%%%%%%%%%%%% END OF CONJUGATE GRADIENT WITH 3 LINESEARCHES %%%%%%%%%%%%%%%%%%%%%%%%%%%%%
end
save mnist_weights w1 w2 w3 w4 w5 w6 w7 w8
save mnist_error test_err train_err;
end
%
% Code provided by Ruslan Salakhutdinov and Geoff Hinton
%
% Permission is granted for anyone to copy, use, modify, or distribute this
% program and accompanying programs and documents for any purpose, provided
% this copyright notice is retained and prominently displayed, along with
% a note saying that the original programs are available from our
% web page.
% The programs and documents are distributed without any warranty, express or
% implied. As the programs were written for research purposes only, they have
% not been tested to the degree that would be advisable in any important
% application. All use of these programs is entirely at the user's own risk.
% This program fine-tunes an autoencoder with backpropagation.
% Weights of the autoencoder are going to be saved in mnist_weights.mat
% and trainig and test reconstruction errors in mnist_error.mat
% You can also set maxepoch, default value is 200 as in our paper.
maxepoch=200;%%迭代次数为200次
fprintf(1,'\nFine-tuning deep autoencoder by minimizing cross entropy error. \n');%%微调通过最小化交叉熵来实现
fprintf(1,'60 batches of 1000 cases each. \n');
load mnistvh%%分别加载4个rbm的参数
load mnisthp
load mnisthp2
load mnistpo
makebatches;%%分批
[numcases numdims numbatches]=size(batchdata);
N=numcases; %%每一批的大小
%%%% PREINITIALIZE WEIGHTS OF THE AUTOENCODER %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
w1=[vishid; hidrecbiases];%%785*1000分别装载每一层的权重和偏置值,将它们作为一个整体一起迭代更新。
w2=[hidpen; penrecbiases];%%1001*500
w3=[hidpen2; penrecbiases2];%%501*250
w4=[hidtop; toprecbiases];%%251*30
w5=[hidtop'; topgenbiases]; %%31*250(以下均为反权值)
w6=[hidpen2'; hidgenbiases2]; %%251*500
w7=[hidpen'; hidgenbiases]; %%501*1000
w8=[vishid'; visbiases];%%1001*784
%%%%%%%%%% END OF PREINITIALIZATION OF WEIGHTS %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
l1=size(w1,1)-1;%%返回行数减1,即784;每个网络层中节点的个数
l2=size(w2,1)-1;%%返回行数减1,即1000;每个网络层中节点的个数
l3=size(w3,1)-1;%%返回行数减1,即500;每个网络层中节点的个数
l4=size(w4,1)-1;%%返回行数减1,即250;每个网络层中节点的个数
l5=size(w5,1)-1;%%返回行数减1,即30;每个网络层中节点的个数
l6=size(w6,1)-1;%%返回行数减1,即250;每个网络层中节点的个数
l7=size(w7,1)-1;%%返回行数减1,即500;每个网络层中节点的个数
l8=size(w8,1)-1;%%返回行数减1,即1000;每个网络层中节点的个数
l9=l1; %%输出层节点和输出层的一样,即784
test_err=[];%%测试误差
train_err=[];%%训练误差
for epoch = 1:maxepoch %%迭代200
%%%%%%%%%%%%%%%%%%%% COMPUTE TRAINING RECONSTRUCTION ERROR %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%计算训练样本重构误差;
err=0;
[numcases numdims numbatches]=size(batchdata);
N=numcases;%%100
for batch = 1:numbatches%%600
data = [batchdata(:,:,batch)];
data = [data ones(N,1)];%%补上一维偏置项100*785
w1probs = 1./(1 + exp(-data*w1)); w1probs = [w1probs ones(N,1)];%%正向传播,计算每一层的输出,且同时在输出上增加一维(值为常量1,偏置项值)
w2probs = 1./(1 + exp(-w1probs*w2)); w2probs = [w2probs ones(N,1)];%%data的每一行为一个样本(28*28),共100行与权值相乘求和。每一个样本都有自己的权值
w3probs = 1./(1 + exp(-w2probs*w3)); w3probs = [w3probs ones(N,1)];
w4probs = w3probs*w4; w4probs = [w4probs ones(N,1)];%%这里没有sigmoid函数
w5probs = 1./(1 + exp(-w4probs*w5)); w5probs = [w5probs ones(N,1)];%%反向训练
w6probs = 1./(1 + exp(-w5probs*w6)); w6probs = [w6probs ones(N,1)];
w7probs = 1./(1 + exp(-w6probs*w7)); w7probs = [w7probs ones(N,1)];
dataout = 1./(1 + exp(-w7probs*w8));
err= err + 1/N*sum(sum( (data(:,1:end-1)-dataout).^2 )); %%重构的误差值
end
train_err(epoch)=err/numbatches;%%训练样本总的误差值
%%%%%%%%%%%%%% END OF COMPUTING TRAINING RECONSTRUCTION ERROR %%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%% DISPLAY FIGURE TOP ROW REAL DATA BOTTOM ROW RECONSTRUCTIONS %%%%%%%%%%%%%%%%%%%%%%%%%
fprintf(1,'Displaying in figure 1: Top row - real data, Bottom row -- reconstructions \n');
output=[];
for ii=1:15
output = [output data(ii,1:end-1)' dataout(ii,:)'];%%输出时15组,每组两列,分别为理论值和重构值
end
if epoch==1
close all
figure('Position',[100,600,1000,200]);
else
figure(1)
end
mnistdisp(output);
drawnow;
%%%%%%%%%%%%%%%%%%%% COMPUTE TEST RECONSTRUCTION ERROR %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%计算测试样本重构误差,测试样本数为10000
[testnumcases testnumdims testnumbatches]=size(testbatchdata);%%100*784*100
N=testnumcases;%%100
err=0;
for batch = 1:testnumbatches%%测试样本分批
data = [testbatchdata(:,:,batch)];
data = [data ones(N,1)];
w1probs = 1./(1 + exp(-data*w1)); w1probs = [w1probs ones(N,1)];%%计算每一层的输出,且同时在输出上增加一维(值为常量1,偏置项值)
w2probs = 1./(1 + exp(-w1probs*w2)); w2probs = [w2probs ones(N,1)];
w3probs = 1./(1 + exp(-w2probs*w3)); w3probs = [w3probs ones(N,1)];
w4probs = w3probs*w4; w4probs = [w4probs ones(N,1)];
w5probs = 1./(1 + exp(-w4probs*w5)); w5probs = [w5probs ones(N,1)];
w6probs = 1./(1 + exp(-w5probs*w6)); w6probs = [w6probs ones(N,1)];
w7probs = 1./(1 + exp(-w6probs*w7)); w7probs = [w7probs ones(N,1)];
dataout = 1./(1 + exp(-w7probs*w8));
err = err + 1/N*sum(sum( (data(:,1:end-1)-dataout).^2 ));
end
test_err(epoch)=err/testnumbatches;
fprintf(1,'Before epoch %d Train squared error: %6.3f Test squared error: %6.3f \t \t \n',epoch,train_err(epoch),test_err(epoch));
%%%%%%%%%%%%%% END OF COMPUTING TEST RECONSTRUCTION ERROR %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
tt=0;
for batch = 1:numbatches/10 %%样本numbatches是600. 将10个小批量数据合为1个
fprintf(1,'epoch %d batch %d\r',epoch,batch);
%%%%%%%%%%% COMBINE 10 MINIBATCHES INTO 1 LARGER MINIBATCH %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
tt=tt+1;
data=[];
for kk=1:10
data=[data
batchdata(:,:,(tt-1)*10+kk)];
end
%%%%%%%%%%%%%%% PERFORM CONJUGATE GRADIENT WITH 3 LINESEARCHES %%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%共轭梯度线性搜索CG
max_iter=3;%%最大迭代次数
VV = [w1(:)' w2(:)' w3(:)' w4(:)' w5(:)' w6(:)' w7(:)' w8(:)']';%%把所有权值(已含偏置项)变成一个大的列向量
Dim = [l1; l2; l3; l4; l5; l6; l7; l8; l9];%%每层网络对应节点的个数(不含偏置项)
[X, fX] = minimize(VV,'CG_MNIST',max_iter,Dim,data);%%应用到CG算法和minimize函数
w1 = reshape(X(1:(l1+1)*l2),l1+1,l2);%%将元素1到元素785000调整元素成785*1000矩阵(权重和偏置存放在w1)
xxx = (l1+1)*l2;
w2 = reshape(X(xxx+1:xxx+(l2+1)*l3),l2+1,l3);%%将元素785001到元素128500调整元素成1001*500矩阵(权重和偏置存放在w2)
xxx = xxx+(l2+1)*l3;
w3 = reshape(X(xxx+1:xxx+(l3+1)*l4),l3+1,l4);
xxx = xxx+(l3+1)*l4;
w4 = reshape(X(xxx+1:xxx+(l4+1)*l5),l4+1,l5);
xxx = xxx+(l4+1)*l5;
w5 = reshape(X(xxx+1:xxx+(l5+1)*l6),l5+1,l6);
xxx = xxx+(l5+1)*l6;
w6 = reshape(X(xxx+1:xxx+(l6+1)*l7),l6+1,l7);
xxx = xxx+(l6+1)*l7;
w7 = reshape(X(xxx+1:xxx+(l7+1)*l8),l7+1,l8);
xxx = xxx+(l7+1)*l8;
w8 = reshape(X(xxx+1:xxx+(l8+1)*l9),l8+1,l9);
%%%%%%%%%%%%%%% END OF CONJUGATE GRADIENT WITH 3 LINESEARCHES %%%%%%%%%%%%%%%%%%%%%%%%%%%%%
end
save mnist_weights w1 w2 w3 w4 w5 w6 w7 w8
save mnist_error test_err train_err;
end