这个是Hinton关于RBM的pre-training的代码。
源代码网址:http://www.cs.toronto.edu/~hinton/MatlabForSciencePaper.html
%%%%%%%%%%%%%%rbm.m
epsilonw = 0.1; % Learning rate for weights
epsilonvb = 0.1; % Learning rate for biases of visible units
epsilonhb = 0.1; % Learning rate for biases of hidden units
weightcost = 0.0002;
initialmomentum = 0.5; %累积冲量
finalmomentum = 0.9;
[numcases numdims numbatches]=size(batchdata);
if restart ==1,
restart=0;
epoch=1;
% Initializing symmetric weights and biases.
vishid = 0.1*randn(numdims, numhid); %初始化可见层到隐含层的权重矩阵
hidbiases = zeros(1,numhid); %初始化隐含层的偏差
visbiases = zeros(1,numdims); %初始化可见层的偏差
poshidprobs = zeros(numcases,numhid); %初始化由data得到的隐含层的概率
neghidprobs = zeros(numcases,numhid); %初始化重构后的data得到的隐含层的概率
posprods = zeros(numdims,numhid); %用于更新权重的矩阵.这个是由datad得到的 X'*h,即是<x1,h1>
negprods = zeros(numdims,numhid); %用于更新权重的矩阵 这个是由重构data得到的X2'*h2,即是<x2,h2>
vishidinc = zeros(numdims,numhid); %权重更新的增量
hidbiasinc = zeros(1,numhid); %隐含层的偏差的增量
visbiasinc = zeros(1,numdims); %可见层偏差的增量
batchposhidprobs=zeros(numcases,numhid,numbatches); %将新得到的隐含层作为下一层的可见层
end
for epoch = epoch:maxepoch, %开始迭代,进行pre-training
fprintf(1,'epoch %d\r',epoch);
errsum=0; % 误差
for batch = 1:numbatches,
fprintf(1,'epoch %d batch %d\r',epoch,batch);
%%%%%%%%% START POSITIVE PHASE %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
data = batchdata(:,:,batch); %提取每一批次的数据来进行预训练
poshidprobs = 1./(1 + exp(-data*vishid - repmat(hidbiases,numcases,1))); %计算隐含层为1的概率
batchposhidprobs(:,:,batch)=poshidprobs; %将隐含层的结果作为下一层RBM的可见层
posprods = data' * poshidprobs; %用于计算更新权重的矩阵.这个是由datad得到的 X'*h,即是<x1,h1>
poshidact = sum(poshidprobs); %用于计算更新隐含层增量,sum是所有的numcase得到的每个隐含层单元的概率和
posvisact = sum(data); %用于计算更新可见层的增量
%%%%%%%%% END OF POSITIVE PHASE %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
poshidstates = poshidprobs > rand(numcases,numhid); %大于随机概率的置1,小于随机概率的置0
%%%%%%%%% START NEGATIVE PHASE %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%对比分歧方法,使用一步吉布斯采样
%对原来的data进行重构
negdata = 1./(1 + exp(-poshidstates*vishid' - repmat(visbiases,numcases,1))); %重构后的data
neghidprobs = 1./(1 + exp(-negdata*vishid - repmat(hidbiases,numcases,1))); %由重构后的data驱动的隐含层
negprods = negdata'*neghidprobs; %用于计算更新权重的矩阵.这个是由data重构后得到的 X'*h,即是<x1,h
neghidact = sum(neghidprobs); %用于计算更新隐含层增量,这个是由data重构后得到的隐含层,
negvisact = sum(negdata);
%%%%%%%%% END OF NEGATIVE PHASE %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
err= sum(sum( (data-negdata).^2 )); %该批错误
errsum = err + errsum; %总的错误
if epoch>5, %累积冲量
momentum=finalmomentum; %若大于5次,冲量为0.9
else
momentum=initialmomentum; %若小于5次,冲量为0.5
end;
%%%%%%%%% UPDATE WEIGHTS AND BIASES %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%下面开始更新权重,具体的更新方程,可查阅Hinton关于RBM的文献,采用的是CD方法
%先计算增量
vishidinc = momentum*vishidinc + ...
epsilonw*( (posprods-negprods)/numcases - weightcost*vishid);
visbiasinc = momentum*visbiasinc + (epsilonvb/numcases)*(posvisact-negvisact);
hidbiasinc = momentum*hidbiasinc + (epsilonhb/numcases)*(poshidact-neghidact);
%再计算更新后的参数
vishid = vishid + vishidinc;
visbiases = visbiases + visbiasinc;
hidbiases = hidbiases + hidbiasinc;
%%%%%%%%%%%%%%%% END OF UPDATES %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
end %结束该批次的权重更新
fprintf(1, 'epoch %4i error %6.1f \n', epoch, errsum);
end; %权重更新迭代次数,结束该层的权重更新