私人使用ss

最新推荐文章于 2024-08-04 11:07:52 发布

weixin_42673583

最新推荐文章于 2024-08-04 11:07:52 发布

阅读量227

点赞数

分类专栏： matlab 文章标签： java 算法人工智能

本文链接：https://blog.csdn.net/weixin_42673583/article/details/128392017

版权

matlab 专栏收录该内容

5 篇文章 0 订阅

订阅专栏

function [res] = vl_myforbackward(net, x, dzdy, res, epoch, count1, varargin)
% vl_myforbackward  evaluates a simple SPDNet

opts.res = [] ;
opts.conserveMemory = false ;
opts.sync = false ;
opts.disableDropout = false ;
opts.freezeDropout = false ;
opts.accumulate = false ;
opts.cudnn = true ;
opts.skipForward = false;
opts.backPropDepth = +inf ;
opts.epsilon = 1e-5; % this parameter is worked in the ReEig Layer

% dev_ml = cell(1,30);
% dev_re = cell(1,30);
% for p = 1 : 30
%     dev_ml{p} = zeros(50,50);
%     dev_re{p} = zeros(400,400);
% end

n = numel(net.layers) ; % count the number of layers

if (nargin <= 2) || isempty(dzdy)
  doder = false ;
else
  doder = true ; % this variable is used to control when to compute the derivative
end

if opts.cudnn
  cudnn = {'CuDNN'} ;
else
  cudnn = {'NoCuDNN'} ;
end

gpuMode = isa(x, 'gpuArray') ;

if nargin <= 3 || isempty(res)
  res = struct(...
    'x', cell(1,n+1), ...
    'dzdx', cell(1,n+1), ... % this gradient is necessary for computing the gradients in the layers below and updating their parameters  
    'dzdw', cell(1,n+1), ... % this gradient is required for updating W
    'aux', cell(1,n+1), ...
    'time', num2cell(zeros(1,n+1)), ...
    'backwardTime', num2cell(zeros(1,n+1))) ;
end
if ~opts.skipForward
  res(1).x = x ;
end


% -------------------------------------------------------------------------
%                                                              Forward pass
% -------------------------------------------------------------------------
%res.SS 是为了加速训练，相当于pytroch的ctx 上下文，是对正向传播和反向传播公用的数据
for i=1:n
  if opts.skipForward
      break; 
  end
  l = net.layers{i} ; % each net layer stores two components: (1) layer type (2) weight
  res(i).time = tic ; % count the time spend on each layer
  switch l.type
    case 'bfc'
      res(i+1).x = vl_mybfc(res(i).x, l.weight, i, res) ; % the output data of each layer is stored in the x part
    case 'fc'
      res(i+1).x = vl_myfc(res(i).x, l.weight) ;
    case 'rec'
        if doder
            alt = doder;
        else
            alt = doder + 1;
        end
        [res(i+1).x, SS] = vl_myrec(res(i).x, opts.epsilon, alt, []) ;
        res(i+1).SS = SS;
      %res(i+1).recov = recov;
    case 'add'
      %使用前面的x是应为前面是其对称求出来的数据，就是log log不是成对出现的么，这个就是将这
      %两个log映射后的加权平均
      sc = res(i-1).x;
      res(i+1).x = vl_myadd(res(i).x,sc);
    case 'rec_relu'
      res(i+1).x = vl_myrec_relu(res(i).x, opts.epsilon) ;
    case 'marginloss'
      res(i+1).obj = 0.0;
      res(i+1).x = res(i).x;
    case 'reconstructionloss'
       res(i+1).obj = vl_myreconstructionloss(res(i).x, res(1).x, epoch); % 
       res(i+1).x = res(7).x;
    case 'log'
        if doder
            alt = doder;
        else
            alt = doder + 1;
        end
        %对于10 16 使用的都是前面与其对应的相同的spd矩阵，求它们的log
        if i == 10
            sc = res(i-5).x;
        elseif i == 16
            sc = res(i-13).x;
        else
            sc = res(i).x;
        end
        %SS是对sc的svd分解
        [res(i+1).x, SS] = vl_mylog(sc, alt, []) ;
        res(i+1).SS = SS;
    case 'exp'  %alt的作用是用来控制是正向传播还是反向传播的
        if doder
            alt = doder;
        else
            alt = doder + 1;
        end
        [res(i+1).x, SS] = vl_myexp(res(i).x, alt, []) ;
        res(i+1).SS = SS;%相当于同一层之间正向传播和反向传播使用的共享变量，
        %在pytorch中成为ctx上下文文本
    case 'softmaxloss'
      res(i+1).x = vl_mysoftmaxloss(res(i).x, l.class) ;
    case 'custom'
          res(i+1) = l.forward(l, res(i), res(i+1)) ;
    otherwise
      error('Unknown layer type %s', l.type) ;
  end
  % optionally forget intermediate results
  forget = opts.conserveMemory ;
  forget = forget & (~doder || strcmp(l.type, 'relu')) ;
  forget = forget & ~(strcmp(l.type, 'loss') || strcmp(l.type, 'softmaxloss')) ;
  forget = forget & (~isfield(l, 'rememberOutput') || ~l.rememberOutput) ;
  if forget
    res(i).x = [] ;
  end 
  if gpuMode & opts.sync
    % This should make things slower, but on MATLAB 2014a it is necessary
    % for any decent performance.
    wait(gpuDevice) ;
  end
  
  res(i).time = toc(res(i).time) ;
end

% -------------------------------------------------------------------------
%                                                             Backward pass
% -------------------------------------------------------------------------

if doder
  res(n+1).dzdx = dzdy ; % the right hand first part of eq.6 in SPDNet. Here, its value is 1
  for i = n:-1:max(1, n-opts.backPropDepth+1) % calculate the derivate in reversed order
    l = net.layers{i} ;
    res(i).backwardTime = tic ;
    switch l.type
      case 'bfc'%返回值有dzdw 因为bfc的权重需要更新，有w权重的都有dzdw返回值 比如fc
        [res(i).dzdx, res(i).dzdw] = ... % all the data in a given batch share the same weight
             vl_mybfc(res(i).x, l.weight, i, res, res(i+1).dzdx) ; % 
                                                           
      case 'fc'
        [res(i).dzdx, res(i).dzdw]  = ...
              vl_myfc(res(i).x, l.weight, res(i+1).dzdx) ; 
      case 'rec'
        temp = res(i).x;
        %下面几行代码也很好理解 第4层的rec后面就接了第10层的log 所以接收第10层的dzdx
        if i == 4
            dev_sc = res(i+6).dzdx; % 
        elseif i == 2
            dev_sc = res(i+14).dzdx; % 
        else
            ZM = zeros(size(temp{1},1),size(temp{1},2));
            for num = 1 : length(temp)
                dev_sc{num} = ZM;
            end
        end
        alt = doder;
        alt = alt - 1;
        [res(i).dzdx, SS] = vl_myrec(res(i).x, opts.epsilon, alt, res(i+1).SS, res(i+1).dzdx, dev_sc) ;
        %[res(i).dzdx, recov] = vl_myrec(res(i).x, opts.epsilon, res(i+1).dzdx) ;
      case 'add'
        %sc是上一层log的结构 res(i).x是这一层log的结果，两个log结果相加 加权平均
        sc = res(i-1).x; %我验证了下sc和res(i).x矩阵大小也是相同的所以解释是对的
        %但是传入这两个对反向传播没有实际意义，这行代码是反向传播的地方。对于add的
        %反向传播公式在代码里面解释了
        res(i).dzdx = vl_myadd(res(i).x, sc, res(i+1).dzdx);
      case 'rec_relu'
        res(i).dzdx = vl_myrec_relu(res(i).x, opts.epsilon, res(i+1).dzdx) ;
      case 'marginloss'
        dev_ml_trans = cell(length(res(i).x),1);
        dzdx_recon = res(i+1).dzdx;
        dzdx_log = res(i+15).dzdx;
        for ii = 1 : length(res(i).x)
            dev_ml_trans{ii} = dzdx_recon{ii} + dzdx_log{ii};
        end
        res(i).dzdx = dev_ml_trans;
      case 'reconstructionloss'
          %虽然res(i+1).dzdx传入的是从后面一层log fc softmaxloss这些损失函数往前传递的，
          %但是具体使用了没有，因为reconstructionloss作为最后一层，不应该使用上一层传递的dzdx
          %进入函数后，dzdx并没有使用传入的参数，而是重新初始化为0了，然后加上求出的导数
        res(i).dzdx = vl_myreconstructionloss(res(i).x, res(1).x, epoch, res(i+1).dzdx) ; % dev_re
      case 'exp'
        alt = doder;
        alt = alt - 1; 
       
        [res(i).dzdx, SS] = vl_myexp(res(i).x, alt, res(i+1).SS, res(i+1).dzdx) ;  
      case 'log'
        
        alt = doder;
        alt = alt - 1; 
        %下面四行代码很好理解。对于9层和15层 对应的就是5个log中的第一个和第三个，
        %就是前两对log的前一个，i+1和i+2的区别 在于i+1是上一层传递过来的导数，
        %i+2是上上一层传递的导数，i+2对应上上一层的导数是add，
        %因为add就是将前两层log映射的相加了，对于第9和15直接使用add也是对的，数据流是反向传输的
      
        if i == 9 || i == 15
            dev_sc = res(i+2).dzdx;
        else
            dev_sc = res(i+1).dzdx;
        end
        if i == 10
            sc = res(i-5).x;
        elseif i == 16 %res(3).x是第三层bfc处理之后的还没有经过log的数据，
            %对应spdnet也是如此，用到是log映射之前的x，对于原因论文中用的是原始的x
            sc = res(i-13).x;
        else
            sc = res(i).x;
        end
        %下面是对最后末尾的log函数进行解释，也就是log-fc-softmaxloss的log
        %sc是这一层的数据，res(i+1)SS也是这一层对应的正向传播的共享数据，dev_sc是上一层的dzdx
        %这一行代码很普通,但是真正使用该dzdx是marginloss函数的反向传播使用，因为在
        %正向传播的过程中，mylog函数接在了marginloss函数后面
        [res(i).dzdx, SS] = vl_mylog(sc, alt, res(i+1).SS, dev_sc) ;
      case 'softmaxloss'
        res(i).dzdx = vl_mysoftmaxloss(res(i).x, l.class, res(i+1).dzdx) ;
      case 'custom'
        res(i) = l.backward(l, res(i), res(i+1));
    end
    if opts.conserveMemory
      res(i+1).dzdx = [] ;
    end
    if gpuMode & opts.sync
      wait(gpuDevice) ;
    end
    res(i).backwardTime = toc(res(i).backwardTime) ;
  end
end

function Y = vl_myreconstructionloss(X, X_ori, epoch, dzdy)
  % this function is designed to implement the decode term with the reconstruction function
  % Date: 
  % Author: 
  % Coryright：
  % Note!!!!!:to make the code currectly run, I adjust the line of 96-107 of steifelfactory.m 
  for m = 1 : length(X)
      dzdy{m} = single(zeros(size(X{1},1),size(X{1},1))); % 400 * 400
  end
  dzdy_l3 = single(1);%dzdy_l3是1
  gamma = 0.01; % needs to be adjusted to 1e-4 for EWD
  count = epoch;
  %gamma = 0.8^floor(epoch / 20) * gamma;
  %是1x30
  dist_sum = zeros(1,length(X)); % save each pair dist
  %Y是30x1的矩阵 对于前向传播来说是一个值，如果是前向传播的话，该值已经覆盖了，就是这行代码没什么用
  %这行代码的存在为了反向传播用的。
  Y = cell(length(X), 1); % save obj or dev
  %1X30的矩阵
  dev_term = cell(1, length(X)); % save each pair' derivation 
  for i = 1 : length(X)
      temp = X{i} - X_ori{i}; %最后一层的矩阵减去第一层的矩阵是51x51的矩阵
      dev_term{i} = 2 * temp;  % D((x_1-x_2)^2)=2(x1-x2)
      dist_sum(i) = norm(temp,'fro') * norm(temp,'fro'); %该函数是所有元素的平方相加
  end
  if nargin < 4%前面30个值相加平均下x gamma作为损失函数的值
      Y = gamma * (sum(dist_sum) / length(X)); % the obj of this loss function
  else
      for j = 1 : length(X)
          %dev_term{j}的项是51x51     size(X{1},1)的大小为51  ones(51)生成一个51x51的矩阵
          %dzdy_l3=1  bsxfun(@times, ones(size(X{1},1)), dzdy_l3))
          %bsxfun类似与广播机制，暂时含不清楚 第一个传入的参数是51x51的全1矩阵，第二个参数是1
          %bsxfun(@times, ones(size(X{1},1)), dzdy_l3) 返回的是51x51的全1矩阵
          %外面的bsxfun函数，里面传入两个参数，第一个是dev_term的第j个元素，里面是51x51的矩阵，
          %第二个参数是51x51的全1矩阵 相当于第一个51x51的矩阵点乘全1矩阵，点乘是对应元素相乘
          %得到的实际没有任何变化，还是51x51的矩阵。这个矩阵就是导数矩阵
          dev_l3 = bsxfun(@times, dev_term{j}, bsxfun(@times, ones(size(X{1},1)), dzdy_l3));
          Y{j} = gamma * dev_l3 + dzdy{j}; % the sum of reconstruction term and softmax term
      end
  end
end