cnn_mnist_init.m (r20)
function net = cnn_mnist_init(varargin)
%mnist网络初始化函数,可以作为训练自有网络的入门参考
% CNN_MNIST_LENET Initialize a CNN similar for MNIST
opts.batchNormalization = true ;
opts.networkType = 'simplenn' ;
opts = vl_argparse(opts, varargin) ;
rng('default');
rng(0) ;
f=1/100 ;
net.layers = {} ;
%定义卷基层,其中randn生成的矩阵为W矩阵,zeros向量为bias向量,早期版本中是分开定义的,现在两者合并为weights。f好像与1/batchsize有关。
net.layers{end+1} = struct('type', 'conv', ...
'weights', {{f*randn(5,5,1,20, 'single'), zeros(1, 20, 'single')}}, ...
'stride', 1, ...
'pad', 0) ;
net.layers{end+1} = struct('type', 'pool', ...
'method', 'max', ...
'pool', [2 2], ...
'stride', 2, ...
'pad', 0) ;
net.layers{end+1} = struct('type', 'conv', ...
'weights', {{f*randn(5,5,20,50, 'single'),zeros(1,50,'single')}}, ...
'stride', 1, ...
'pad', 0) ;
net.layers{end+1} = struct('type', 'pool', ...
'method', 'max', ...
'pool', [2 2], ...
'stride', 2, ...
'pad', 0) ;
net.layers{end+1} = struct('type', 'conv', ...
'weights', {{f*randn(4,4,50,500, 'single'), zeros(1,500,'single')}}, ...
'stride', 1, ...
'pad', 0) ;
net.layers{end+1} = struct('type', 'relu') ;
net.layers{end+1} = struct('type', 'conv', ...
'weights', {{f*randn(1,1,500,10, 'single'), zeros(1,10,'single')}}, ...
'stride', 1, ...
'pad', 0) ;
net.layers{end+1} = struct('type', 'softmaxloss') ;
% optionally switch to batch normalization
%看样子最好还是倒过来插入,因为网络层数在插入后变了
if opts.batchNormalization
net = insertBnorm(net, 1) ;
net = insertBnorm(net, 4) ;
net = insertBnorm(net, 7) ;
end
% Meta parameters
net.meta.inputSize = [28 28 1] ;
net.meta.trainOpts.learningRate = 0.001 ;
net.meta.trainOpts.numEpochs = 20 ;
net.meta.trainOpts.batchSize = 100 ;
% Fill in defaul values
%主要处理不同版本MatConvNet网络的兼容性,以及补全初始化漏掉的参数。对训练没有实际作用
net = vl_simplenn_tidy(net) ;
% Switch to DagNN if requested
switch lower(opts.networkType)
case 'simplenn'
% done
case 'dagnn'
%从SimpleNN转换为DagNN
net = dagnn.DagNN.fromSimpleNN(net, 'canonicalNames', true) ;
%这块不理解?dagnn的Loss函数怎么定义,以后再看
net.addLayer('top1err', dagnn.Loss('loss', 'classerror'), ...
{'prediction', 'label'}, 'error') ;
net.addLayer('top5err', dagnn.Loss('loss', 'topkerror', ...
'opts', {'topk', 5}), {'prediction', 'label'}, 'top5err') ;
otherwise
assert(false) ;
end
% --------------------------------------------------------------------
function net = insertBnorm(net, l)
% --------------------------------------------------------------------
%插入Batch Normalization层
assert(isfield(net.layers{l}, 'weights'));
ndim = size(net.layers{l}.weights{1}, 4);
layer = struct('type', 'bnorm', ...
'weights', {{ones(ndim, 1, 'single'), zeros(ndim, 1, 'single')}}, ...
'learningRate', [1 1 0.05], ...
'weightDecay', [0 0]) ;
%废掉biases,本来应该也没用
net.layers{l}.biases = [] ;
%插入,简单粗暴,好!
net.layers = horzcat(net.layers(1:l), layer, net.layers(l+1:end)) ;