【无标题】

weixin_49241211

已于 2023-05-19 14:48:27 修改

阅读量182

点赞数 1

文章标签：深度学习人工智能

于 2023-05-19 14:32:28 首次发布

本文链接：https://blog.csdn.net/weixin_49241211/article/details/130766066

版权

spdnet_afew.m

function [net, info] = spdnet_afew(varargin)
%set up the path
confPath;
%parameter setting
opts.dataDir = fullfile('./data/afew') ;
opts.imdbPathtrain = fullfile(opts.dataDir, 'spddb_afew_train_spd400_int_histeq.mat');
opts.batchSize = 30 ;
opts.test.batchSize = 1;
opts.numEpochs = 500 ;
opts.gpus = [] ;
opts.learningRate = 0.01*ones(1,opts.numEpochs);
opts.weightDecay = 0.0005 ;
opts.continue = 1;
%spdnet initialization
net = spdnet_init_afew() ;
%loading metadata 
load(opts.imdbPathtrain) ;
%spdnet training
[net, info] = spdnet_train_afew(net, spd_train, opts);

spdnet_init_afew.m

function net = spdnet_init_afew(varargin)
% spdnet_init initializes a spdnet

rng('default');
rng(0) ;

opts.layernum = 3;

Winit = cell(opts.layernum,1);
opts.datadim = [400, 200, 100, 50];


for iw = 1 : opts.layernum
    A = rand(opts.datadim(iw));
    [U1, S1, V1] = svd(A * A');
    Winit{iw} = U1(:,1:opts.datadim(iw+1));
end

f=1/100 ;
classNum = 7;
fdim = size(Winit{iw},2)*size(Winit{iw},2);
theta = f*randn(fdim, classNum, 'single');
Winit{end+1} = theta;

net.layers = {} ;
net.layers{end+1} = struct('type', 'bfc',...
                          'weight', Winit{1}) ;
net.layers{end+1} = struct('type', 'rec') ;
net.layers{end+1} = struct('type', 'bfc',...
                          'weight', Winit{2}) ;
net.layers{end+1} = struct('type', 'rec') ;
net.layers{end+1} = struct('type', 'bfc',...
                          'weight', Winit{3}) ;
net.layers{end+1} = struct('type', 'log') ;
net.layers{end+1} = struct('type', 'fc', ...
                           'weight', Winit{end}) ;
net.layers{end+1} = struct('type', 'softmaxloss') ;

spdnet_train_afew.m

function [net, info] = spdnet_train_afew(net, spd_train, opts)

opts.errorLabels = {'top1e'};
opts.train = find(spd_train.spd.set==1) ;
opts.val = find(spd_train.spd.set==2) ;
    
for epoch=1:opts.numEpochs
    learningRate = opts.learningRate(epoch);
    
     % fast-forward to last checkpoint
     modelPath = @(ep) fullfile(opts.dataDir, sprintf('net-epoch-%d.mat', ep));
     modelFigPath = fullfile(opts.dataDir, 'net-train.pdf') ;
     if opts.continue
         if exist(modelPath(epoch),'file')
             if epoch == opts.numEpochs
                 load(modelPath(epoch), 'net', 'info') ;
             end
             continue ;
         end
         if epoch > 1
             fprintf('resuming by loading epoch %d\n', epoch-1) ;
             load(modelPath(epoch-1), 'net', 'info') ;
         end
     end
    
    train = opts.train(randperm(length(opts.train))) ; % shuffle
    val = opts.val;

    [net,stats.train] = process_epoch(opts, epoch, spd_train, train, learningRate, net) ;
    [net,stats.val] = process_epoch(opts, epoch, spd_train, val, 0, net) ;

   
    % save
    evaluateMode = 0;
    
    if evaluateMode, sets = {'train'} ; else sets = {'train', 'val'} ; end

    for f = sets
        f = char(f) ;
        n = numel(eval(f)) ; %
        info.(f).objective(epoch) = stats.(f)(2) / n ;
        info.(f).error(:,epoch) = stats.(f)(3:end) / n ;
    end
    if ~evaluateMode, save(modelPath(epoch), 'net', 'info') ; end

    figure(1) ; clf ;
    hasError = 1 ;
    subplot(1,1+hasError,1) ;
    if ~evaluateMode
        semilogy(1:epoch, info.train.objective, '.-', 'linewidth', 2) ;
        hold on ;
    end
    semilogy(1:epoch, info.val.objective, '.--') ;
    xlabel('training epoch') ; ylabel('energy') ;
    grid on ;
    h=legend(sets) ;
    set(h,'color','none');
    title('objective') ;
    if hasError
        subplot(1,2,2) ; leg = {} ;
        if ~evaluateMode
            plot(1:epoch, info.train.error', '.-', 'linewidth', 2) ;
            hold on ;
            leg = horzcat(leg, strcat('train ', opts.errorLabels)) ;
        end
        plot(1:epoch, info.val.error', '.--') ;
        leg = horzcat(leg, strcat('val ', opts.errorLabels)) ;
        set(legend(leg{:}),'color','none') ;
        grid on ;
        xlabel('training epoch') ; ylabel('error') ;
        title('error') ;
    end
    drawnow ;
    print(1, modelFigPath, '-dpdf') ;
end
    
    
    
function [net,stats] = process_epoch(opts, epoch, spd_train, trainInd, learningRate, net)

training = learningRate > 0 ;
if training, mode = 'training' ; else mode = 'validation' ; end

stats = [0 ; 0 ; 0] ;
numGpus = numel(opts.gpus) ;
if numGpus >= 1
    one = gpuArray(single(1)) ;
else
    one = single(1) ;
end

batchSize = opts.batchSize;
errors = 0;
numDone = 0 ;


for ib = 1 : batchSize : length(trainInd)
    fprintf('%s: epoch %02d: batch %3d/%3d:', mode, epoch, ib,length(trainInd)) ;
    batchTime = tic ;
    res = [];
    if (ib+batchSize> length(trainInd))
        batchSize_r = length(trainInd)-ib+1;
    else
        batchSize_r = batchSize;
    end
    spd_data = cell(batchSize_r,1);    
    spd_label = zeros(batchSize_r,1);
    for ib_r = 1 : batchSize_r
        spdPath = [spd_train.spdDir '\' spd_train.spd.name{trainInd(ib+ib_r-1)}];
        load(spdPath); spd_data{ib_r} = Y1;
        spd_label(ib_r) = spd_train.spd.label(trainInd(ib+ib_r-1));

    end
    net.layers{end}.class = spd_label ;

    %forward/backward spdnet
    if training, dzdy = one; else dzdy = [] ; end
    res = vl_myforbackward(net, spd_data, dzdy, res) ;
   
    %accumulating graidents
    if numGpus <= 1
      [net,res] = accumulate_gradients(opts, learningRate, batchSize_r, net, res) ;
    else
      if isempty(mmap)
        mmap = map_gradients(opts.memoryMapFile, net, res, numGpus) ;
      end
      write_gradients(mmap, net, res) ;
      labBarrier() ;
      [net,res] = accumulate_gradients(opts, learningRate, batchSize_r, net, res, mmap) ;
    end
          
    % accumulate training errors
    predictions = gather(res(end-1).x) ;
    [~,pre_label] = sort(predictions, 'descend') ;
    error = sum(~bsxfun(@eq, pre_label(1,:)', spd_label)) ;

    numDone = numDone + batchSize_r ;
    errors = errors+error;
    batchTime = toc(batchTime) ;
    speed = batchSize/batchTime ;
    stats = stats+[batchTime ; res(end).x ; error]; % works even when stats=[]
    
    fprintf(' %.2f s (%.1f data/s)', batchTime, speed) ;

    fprintf(' error: %.5f', stats(3)/numDone) ;
    fprintf(' obj: %.5f', stats(2)/numDone) ;

    fprintf(' [%d/%d]', numDone, batchSize_r);
    fprintf('\n') ;
    
end


% -------------------------------------------------------------------------
function [net,res] = accumulate_gradients(opts, lr, batchSize, net, res, mmap)
% -------------------------------------------------------------------------
for l=numel(net.layers):-1:1
  if isempty(res(l).dzdw)==0
    if ~isfield(net.layers{l}, 'learningRate')
       net.layers{l}.learningRate = 1 ;
    end
    if ~isfield(net.layers{l}, 'weightDecay')
       net.layers{l}.weightDecay = 1;
    end
    thisLR = lr * net.layers{l}.learningRate ;

    if isfield(net.layers{l}, 'weight')
        if strcmp(net.layers{l}.type,'bfc')==1
            W1=net.layers{l}.weight;
            W1grad = (1/batchSize)*res(l).dzdw;
            %gradient update on Stiefel manifolds
            problemW1.M = stiefelfactory(size(W1,1), size(W1,2));
            W1Rgrad = (problemW1.M.egrad2rgrad(W1, W1grad));
            net.layers{l}.weight = (problemW1.M.retr(W1, -thisLR*W1Rgrad)); %%!!!NOTE
        else
            net.layers{l}.weight = net.layers{l}.weight - thisLR * (1/batchSize)* res(l).dzdw ;
        end
    
    end
  end
end

confPath.m

% Add folders to path.
addpath(pwd);

cd manopt;
addpath(genpath(pwd));
cd ..;

cd utils;
addpath(genpath(pwd));
cd ..;

cd spdnet;
addpath(genpath(pwd));
cd ..;

vl_mybfc.m

function [Y, Y_w] = vl_mybfc(X, W, dzdy)
%[DZDX, DZDF, DZDB] = VL_MYBFC(X, F, B, DZDY)
%BiMap layer

Y = cell(length(X),1);
for ix = 1  : length(X)
    Y{ix} = W'*X{ix}*W;
end
if nargin == 3
    [dim_ori, dim_tar] = size(W);
    Y_w = zeros(dim_ori,dim_tar);
    for ix = 1  : length(X)
        if iscell(dzdy)==1
            d_t = dzdy{ix};
        else
            d_t = dzdy(:,ix);
            d_t = reshape(d_t,[dim_tar dim_tar]);
        end
        Y{ix} = W*d_t*W';
        Y_w = Y_w+2*X{ix}*W*d_t;
    end
end

vl_myfc.m

function [Y, Y_w] = vl_myfc(X, W, dzdy)
%[DZDX, DZDF, DZDB] = vl_myconv(X, F, B, DZDY)
%regular fully connected layer

X_t = zeros(size(X{1},1)^2,length(X));
for ix = 1 : length(X)
    x_t = X{ix};
    X_t(:,ix) = x_t(:);
end
if nargin < 3
    Y = W'*X_t;
else
    Y = W * dzdy;
    Y_w = X_t * dzdy';
end

vl_myforbackward.m

function res = vl_myforbackward(net, x, dzdy, res, varargin)
% vl_myforbackward  evaluates a simple SPDNet

opts.res = [] ;
opts.conserveMemory = false ;
opts.sync = false ;
opts.disableDropout = false ;
opts.freezeDropout = false ;
opts.accumulate = false ;
opts.cudnn = true ;
opts.skipForward = false;
opts.backPropDepth = +inf ;
opts.epsilon = 1e-4;

% opts = vl_argparse(opts, varargin);

n = numel(net.layers) ;

if (nargin <= 2) || isempty(dzdy)
  doder = false ;
else
  doder = true ;
end

if opts.cudnn
  cudnn = {'CuDNN'} ;
else
  cudnn = {'NoCuDNN'} ;
end

gpuMode = isa(x, 'gpuArray') ;

if nargin <= 3 || isempty(res)
  res = struct(...
    'x', cell(1,n+1), ...
    'dzdx', cell(1,n+1), ...
    'dzdw', cell(1,n+1), ...
    'aux', cell(1,n+1), ...
    'time', num2cell(zeros(1,n+1)), ...
    'backwardTime', num2cell(zeros(1,n+1))) ;
end
if ~opts.skipForward
  res(1).x = x ;
end


% -------------------------------------------------------------------------
%                                                              Forward pass
% -------------------------------------------------------------------------

for i=1:n
  if opts.skipForward, break; end;
  l = net.layers{i} ;
  res(i).time = tic ;
  switch l.type
    case 'bfc'
      res(i+1).x = vl_mybfc(res(i).x, l.weight) ; 
    case 'fc'
      res(i+1).x = vl_myfc(res(i).x, l.weight) ; 
    case 'rec'
      res(i+1).x = vl_myrec(res(i).x, opts.epsilon) ; 
    case 'log'
      res(i+1).x = vl_mylog(res(i).x) ;
    case 'softmaxloss'
      res(i+1).x = vl_mysoftmaxloss(res(i).x, l.class) ; 

    case 'custom'
      res(i+1) = l.forward(l, res(i), res(i+1)) ;
    otherwise
      error('Unknown layer type %s', l.type) ;
  end
  % optionally forget intermediate results
  forget = opts.conserveMemory ;
  forget = forget & (~doder || strcmp(l.type, 'relu')) ;
  forget = forget & ~(strcmp(l.type, 'loss') || strcmp(l.type, 'softmaxloss')) ;
  forget = forget & (~isfield(l, 'rememberOutput') || ~l.rememberOutput) ;
  if forget
    res(i).x = [] ;
  end
  if gpuMode & opts.sync
    % This should make things slower, but on MATLAB 2014a it is necessary
    % for any decent performance.
    wait(gpuDevice) ;
  end
  res(i).time = toc(res(i).time) ;
end

% -------------------------------------------------------------------------
%                                                             Backward pass
% -------------------------------------------------------------------------

if doder
  res(n+1).dzdx = dzdy ;
  for i=n:-1:max(1, n-opts.backPropDepth+1)
    l = net.layers{i} ;
    res(i).backwardTime = tic ;
    switch l.type
      case 'bfc'
        [res(i).dzdx, res(i).dzdw] = ...
             vl_mybfc(res(i).x, l.weight, res(i+1).dzdx) ;
      case 'fc'
        [res(i).dzdx, res(i).dzdw]  = ...
              vl_myfc(res(i).x, l.weight, res(i+1).dzdx) ; 
      case 'rec'
        res(i).dzdx = vl_myrec(res(i).x, opts.epsilon, res(i+1).dzdx) ;
      case 'log'
        res(i).dzdx = vl_mylog(res(i).x, res(i+1).dzdx) ;
      case 'softmaxloss'
        res(i).dzdx = vl_mysoftmaxloss(res(i).x, l.class, res(i+1).dzdx) ;
      case 'custom'
        res(i) = l.backward(l, res(i), res(i+1)) ;
    end
    if opts.conserveMemory
      res(i+1).dzdx = [] ;
    end
    if gpuMode & opts.sync
      wait(gpuDevice) ;
    end
    res(i).backwardTime = toc(res(i).backwardTime) ;
  end
end

vl_mylog.m

function Y = vl_mylog(X, dzdy)
%Y = VL_MYLOG(X, DZDY)
%LogEig layer

Us = cell(length(X),1);
Ss = cell(length(X),1);
Vs = cell(length(X),1);

for ix = 1 : length(X)
    [Us{ix},Ss{ix},Vs{ix}] = svd(X{ix});
end


D = size(Ss{1},2);
Y = cell(length(X),1);

if nargin < 2
    for ix = 1:length(X)
        Y{ix} = Us{ix}*diag(log(diag(Ss{ix})))*Us{ix}';
    end
else
    for ix = 1:length(X)
        U = Us{ix}; S = Ss{ix}; V = Vs{ix};
        diagS = diag(S);
        ind =diagS >(D*eps(max(diagS)));
        Dmin = (min(find(ind,1,'last'),D));
        
        S = S(:,ind); U = U(:,ind);
        dLdC = double(reshape(dzdy(:,ix),[D D])); dLdC = symmetric(dLdC);
        

        dLdV = 2*dLdC*U*diagLog(S,0);
        dLdS = diagInv(S)*(U'*dLdC*U);
        if sum(ind) == 1 % diag behaves badly when there is only 1d
            K = 1./(S(1)*ones(1,Dmin)-(S(1)*ones(1,Dmin))'); 
            K(eye(size(K,1))>0)=0;
        else
            K = 1./(diag(S)*ones(1,Dmin)-(diag(S)*ones(1,Dmin))');
            K(eye(size(K,1))>0)=0;
            K(find(isinf(K)==1))=0; 
        end
        if all(diagS==1)
            dzdx = zeros(D,D);
        else
            dzdx = U*(symmetric(K'.*(U'*dLdV))+dDiag(dLdS))*U';

        end
        Y{ix} =  dzdx; %warning('no normalization');        
    end
end

vl_myrec.m

function Y = vl_myrec(X, epsilon, dzdy)
% Y = VL_MYREC (X, EPSILON, DZDY)
% ReEig layer

Us = cell(length(X),1);
Ss = cell(length(X),1);
Vs = cell(length(X),1);

for ix = 1 : length(X)
    [Us{ix},Ss{ix},Vs{ix}] = svd(X{ix});
end

D = size(Ss{1},2);
Y = cell(length(X),1);

if nargin < 3
    for ix = 1:length(X)
        [max_S, ~]=max_eig(Ss{ix},epsilon);
        Y{ix} = Us{ix}*max_S*Us{ix}';
    end
else
    for ix = 1:length(X)
        U = Us{ix}; S = Ss{ix}; V = Vs{ix};

        Dmin = D;
        
        dLdC = double(dzdy{ix}); dLdC = symmetric(dLdC);
        
        [max_S, max_I]=max_eig(Ss{ix},epsilon);
        dLdV = 2*dLdC*U*max_S;
        dLdS = (diag(not(max_I)))*U'*dLdC*U;
        
        
        K = 1./(diag(S)*ones(1,Dmin)-(diag(S)*ones(1,Dmin))');
        K(eye(size(K,1))>0)=0;
        K(find(isinf(K)==1))=0; 
        
        dzdx = U*(symmetric(K'.*(U'*dLdV))+dDiag(dLdS))*U';
        
        Y{ix} =  dzdx; %warning('no normalization');
    end
end

vl_mysoftmaxloss.m

function Y = vl_mysoftmaxloss(X,c,dzdy)
% Softmax layer

% class c = 0 skips a spatial location
mass = single(c > 0) ;
mass = mass';

% convert to indexes
c_ = c - 1 ;
for ic = 1  : length(c)
    c_(ic) = c(ic)+(ic-1)*size(X,1);
end

% compute softmaxloss
Xmax = max(X,[],1) ;
ex = exp(bsxfun(@minus, X, Xmax)) ;

% s = bsxfun(@minus, X, Xmax);
% ex = exp(s) ;
% y = ex./repmat(sum(ex,1),[size(X,1) 1]);

%n = sz(1)*sz(2) ;
if nargin < 3
  t = Xmax + log(sum(ex,1)) - reshape(X(c_), [1 size(X,2)]) ;
  Y = sum(sum(mass .* t,1)) ;
else
  Y = bsxfun(@rdivide, ex, sum(ex,1)) ;
  Y(c_) = Y(c_) - 1;
  Y = bsxfun(@times, Y, bsxfun(@times, mass, dzdy)) ;
end