FFN(mlpack)

FFN

Constructor

主要构造函数头文件:

/**
 * Implementation of a standard feed forward network.
 *
 * @tparam OutputLayerType The output layer type used to evaluate the network.
 * @tparam InitializationRuleType Rule used to initialize the weight matrix.
 * @tparam CustomLayers Any set of custom layers that could be a part of the
 *         feed forward network.
 */
template<
  typename OutputLayerType = NegativeLogLikelihood<>,
  typename InitializationRuleType = RandomInitialization,
  typename... CustomLayers
>
class FFN
{
   
 public:
  //! Convenience typedef for the internal model construction.
  using NetworkType = FFN<OutputLayerType, InitializationRuleType>;

  /**
   * Create the FFN object.
   *
   * Optionally, specify which initialize rule and performance function should
   * be used.
   *
   * If you want to pass in a parameter and discard the original parameter
   * object, be sure to use std::move to avoid unnecessary copy.
   *
   * @param outputLayer Output layer used to evaluate the network.
   * @param initializeRule Optional instantiated InitializationRule object
   *        for initializing the network parameter.
   */
  FFN(OutputLayerType outputLayer = OutputLayerType(),
      InitializationRuleType initializeRule = InitializationRuleType());

实现:

template<typename OutputLayerType, typename InitializationRuleType,
         typename... CustomLayers>
FFN<OutputLayerType, InitializationRuleType, CustomLayers...>::FFN(
    OutputLayerType outputLayer, InitializationRuleType initializeRule) :
    outputLayer(std::move(outputLayer)),
    initializeRule(std::move(initializeRule)),
    width(0),
    height(0),
    reset(false),
    numFunctions(0),
    deterministic(false)
{
   
  /* Nothing to do here. */
}

构造函数有两个主要的模板参数: OutputLayerType 和 InitializationRuleType,去看一下它们的默认实现

NegativeLogLikelihood 头文件:

/**
 * Implementation of the negative log likelihood layer. The negative log
 * likelihood layer expectes that the input contains log-probabilities for each
 * class. The layer also expects a class index, in the range between 1 and the
 * number of classes, as target when calling the Forward function.
 *
 * @tparam InputDataType Type of the input data (arma::colvec, arma::mat,
 *         arma::sp_mat or arma::cube).
 * @tparam OutputDataType Type of the output data (arma::colvec, arma::mat,
 *         arma::sp_mat or arma::cube).
 */
template <
    typename InputDataType = arma::mat,
    typename OutputDataType = arma::mat
>
class NegativeLogLikelihood
{
   
 public:
  /**
   * Create the NegativeLogLikelihoodLayer object.
   */
  NegativeLogLikelihood();

  /**
   * Computes the Negative log likelihood.
   *
   * @param input Input data used for evaluating the specified function.
   * @param target The target vector, that contains the class index in the range
   *        between 1 and the number of classes.
   */
  template<typename InputType, typename TargetType>
  typename InputType::elem_type Forward(const InputType& input,
                                        const TargetType& target);

  /**
   * Ordinary feed backward pass of a neural network. The negative log
   * likelihood layer expects that the input contains log-probabilities for
   * each class. The layer also expects a class index, in the range between 1
   * and the number of classes, as target when calling the Forward function.
   *
   * @param input The propagated input activation.
   * @param target The target vector, that contains the class index in the range
   *        between 1 and the number of classes.
   * @param output The calculated error.
   */
  template<typename InputType, typename TargetType, typename OutputType>
  void Backward(const InputType& input,
                const TargetType& target,
                OutputType& output);

  //! Get the input parameter.
  InputDataType& InputParameter() const {
    return inputParameter; }
  //! Modify the input parameter.
  InputDataType& InputParameter() {
    return inputParameter; }

  //! Get the output parameter.
  OutputDataType& OutputParameter() const {
    return outputParameter; }
  //! Modify the output parameter.
  OutputDataType& OutputParameter() {
    return outputParameter; }

  //! Get the delta.
  OutputDataType& Delta() const {
    return delta; }
  //! Modify the delta.
  OutputDataType& Delta() {
    return delta; }

  /**
   * Serialize the layer
   */
  template<typename Archive>
  void serialize(Archive& /* ar */, const unsigned int /* version */);

 private:
  //! Locally-stored delta object.
  OutputDataType delta;

  //! Locally-stored input parameter object.
  InputDataType inputParameter;

  //! Locally-stored output parameter object.
  OutputDataType outputParameter;
}; // class NegativeLogLikelihood

实现:

template<typename InputDataType, typename OutputDataType>
NegativeLogLikelihood<InputDataType, OutputDataType>::NegativeLogLikelihood()
{
   
  // Nothing to do here.
}

template<typename InputDataType, typename OutputDataType>
template<typename InputType, typename TargetType>
typename InputType::elem_type
NegativeLogLikelihood<InputDataType, OutputDataType>::Forward(
    const InputType& input,
    const TargetType& target)
{
   
  typedef typename InputType::elem_type ElemType;
  ElemType output = 0;
  for (size_t i = 0; i < input.n_cols; ++i)
  {
   
    size_t currentTarget = target(i) - 1;
    Log::Assert(currentTarget < input.n_rows,
        "Target class out of range.");

    output -= input(currentTarget, i);
  }

  return output;
}

template<typename InputDataType, typename OutputDataType>
template<typename InputType, typename TargetType, typename OutputType>
void NegativeLogLikelihood<InputDataType, OutputDataType>::Backward(
      const InputType& input,
      const TargetType& target,
      OutputType& output)
{
   
  output = arma::zeros<OutputType>(input.n_rows, input.n_cols);
  for (size_t i = 0; i < input.n_cols; ++i)
  {
   
    size_t currentTarget = target(i) - 1;
    Log::Assert(currentTarget < input.n_rows,
        "Target class out of range.");

    output(currentTarget, i) = -1;
  }
}

template<typename InputDataType, typename OutputDataType>
template<typename Archive>
void NegativeLogLikelihood<InputDataType, OutputDataType>::serialize(
    Archive& /* ar */,
    const unsigned int /* version */)
{
   
  // Nothing to do here.
}

负对数似然损失中重要的就是那两个 Forward , Backward 方法,我们不妨引入一些记号:
i n p u t : ( X 1 , ⋯   , X N )   , X i ∈ R n    ∀   i ∈ [ 1 , N ] ⇒ [ x 11   x 12 ⋯   x 1 N ⋮ x n 1   x n 2 ⋯   x n N ] t a r g e t : ( y 1 , ⋯   , y N )   , y i ∈ [ 1 , m ] input: ( X_1, \cdots , X_N) \ , \quad X_i \in \mathbb{R}^n \ \ \forall \ i \in [1, N] \\[6pt] \Rightarrow \begin{bmatrix} x_{11} \ x_{12} \cdots \ x_{1N} \\ \vdots \\ x_{n1} \ x_{n2} \cdots \ x_{nN} \end{bmatrix} \\[6pt] target: (y_1 , \cdots , y_N) \ , \quad y_i \in [1, m] input:(X1,,XN) ,XiRn   i[1,N]x11 x12 x1Nxn1 xn2 xnNtarget:(y1,,yN) ,yi[1,m]
因此:
Forward:
o u t p u t = − ∑ i = 1 N x ( y i , i )   , y i ⩽ n output = - \sum_{i=1}^N x_{(y_i ,i)} \ , \quad y_i \leqslant n output=i=1Nx(yi,i) ,yin
Backward:
( n × N ) : o u t p u t ( j , i ) = { − 1   , j = y i    ( y i ⩽ n ) 0   , o t h e r w i s e (n \times N): \quad output_{(j, i)}= \begin{cases} -1 \ , \quad j = y_i \ \ (y_i \leqslant n) \\ 0 \ , \quad otherwise \end{cases} (n×N):output(j,i)={ 1 ,j=yi  (yin)0 ,otherwise

RandomInitialization :

/**
 * This class is used to initialize randomly the weight matrix.
 */
class RandomInitialization
{
   
 public:
  /**
   * Initialize the random initialization rule with the given lower bound and
   * upper bound.
   *
   * @param lowerBound The number used as lower bound.
   * @param upperBound The number used as upper bound.
   */
  RandomInitialization(const double lowerBound = -1,
                       const double upperBound = 1) :
      lowerBound(lowerBound), upperBound(upperBound) {
    }

  /**
   * Initialize the random initialization rule with the given bound.
   * Using the negative of the bound as lower bound and the positive bound as
   * upper bound.
   *
   * @param bound The number used as lower bound
   */
  RandomInitialization(const double bound) :
      lowerBound(-std::abs(bound)), upperBound(std::abs(bound)) {
    }

  /**
   * Initialize randomly the elements of the specified weight matrix.
   *
   * @param W Weight matrix to initialize.
   * @param rows Number of rows.
   * @param cols Number of columns.
   */
  template<typename eT>
  void Initialize(arma::Mat<eT>& W, const size_t rows, const size_t cols)
  {
   
    if (W.is_empty())
      W.set_size(rows, cols);

    W.randu();
    W *= (upperBound - lowerBound);
    W += lowerBound;
  }

  /**
   * Initialize randomly the elements of the specified weight matrix.
   *
   * @param W Weight matrix to initialize.
   */
  template<typename eT>
  void Initialize(arma::Mat<eT>& W)
  {
   
    if (W.is_empty())
      Log::Fatal << "Cannot initialize an empty matrix." << std::endl;

    W.randu();
    W *= (upperBound - lowerBound);
    W += lowerBound;
  }

  /**
   * Initialize randomly the elements of the specified weight 3rd order tensor.
   *
   * @param W Weight matrix to initialize.
   * @param rows Number of rows.
   * @param cols Number of columns.
   * @param slices Number of slices.
   */
  template<typename eT>
  void Initialize(arma::Cube<eT>& W,
                  const size_t rows,
                  const size_t cols,
                  const size_t slices)
  {
   
    if (W.is_empty())
      W.set_size(rows, cols, slices);

    for (size_t i = 0; i < slices; ++i)
      Initialize(W.slice(i), rows, cols);
  }

  /**
   * Initialize randomly the elements of the specified weight 3rd order tensor.
   *
   * @param W Weight matrix to initialize.
   */
  template<typename eT>
  void Initialize(arma::Cube<eT>& W)
  {
   
    if (W.is_empty())
      Log::Fatal << "Cannot initialize an empty cube." << std::endl;

    for (size_t i = 0; i < W.n_slices; ++i)
      Initialize(W.slice(i));
  }

 private:
  //! The number used as lower bound.
  double lowerBound;

  //! The number used as upper bound.
  double upperBound;
}; // class RandomInitialization

.randu() 在官方中的说明:

.randu() uses a uniform distribution in the [0,1] interval

因此,该初始化方法先产生服从 U ( 0 , 1 ) U(0, 1) U(0,1) 的初始值,再乘以 ( u p p e r B o u n d − l o w e r B o u n d ) (upperBound - lowerBound) (upperBoundlowerBound),加上 l o w e r B o u n d lowerBound lowerBound
有:
E ( W ) = ( u p p e r B o u n d + l o w e r B o u n d ) 2 D ( W ) = ( u p p e r B o u n d − l o w e r B o u n d ) 2 12 E(W) = \dfrac{(upperBound + lowerBound)}{2} \\[6pt] D(W) = \dfrac{(upperBound - lowerBound)^2}{12} E(W)=2(upperBound+lowerBound)D(W)=12(upperBoundlowerBound)2

Train

Train 头文件:

  /**
   * Train the feedforward network on the given input data using the given
   * optimizer.
   *
   * This will use the existing model parameters as a starting point for the
   * optimization. If this is not what you want, then you should access the
   * parameters vector directly with Parameters() and modify it as desired.
   *
   * If you want to pass in a parameter and discard the original parameter
   * object, be sure to use std::move to avoid unnecessary copy.
   *
   * @tparam OptimizerType Type of optimizer to use to train the model.
   * @tparam CallbackTypes Types of Callback Functions.
   * @param predictors Input training variables.
   * @param responses Outputs results from input training variables.
   * @param optimizer Instantiated optimizer used to train the model.
   * @param callbacks Callback function for ensmallen optimizer `OptimizerType`.
   *      See https://www.ensmallen.org/docs.html#callback-documentation.
   * @return The final objective of the trained model (NaN or Inf on error).
   */
  template<typename OptimizerType, typename... CallbackTypes>
  double Train(arma::mat predictors,
               arma::mat responses,
               OptimizerType& optimizer,
               CallbackTypes&&... callbacks);

实现:

template<typename OutputLayerType, typename InitializationRuleType,
         typename... CustomLayers>
template<typename OptimizerType, typename... CallbackTypes>
double FFN<OutputLayerType, InitializationRuleType, CustomLayers...>::Train(
      arma::mat predictors,
      arma::mat responses,
      OptimizerType& optimizer,
      CallbackTypes&&... callbacks)
{
   
  ResetData(std::move(predictors), std::move(responses));

  WarnMessageMaxIterations<OptimizerType>(optimizer, this->predictors.n_cols);

  // Train the model.
  Timer::Start("ffn_optimization");
  const double out = optimizer.Optimize(*this, parameter, callbacks...);
  Timer::Stop("ffn_optimization");

  Log::Info << "FFN::FFN(): final objective of trained model is " << out
      << "." << std::endl;
  return out;
}

构造完模型后,就是利用给定的数据集和标签来进行训练,从实现来看,这不难理解:
利用 ensmallen 里的优化器,将自身作为待优化的函数传入,将参数 parameter 传入

参照之前介绍的 Adam 优化算法,可以猜到,该模型一定封装有 Evaluate 和 Gradient 函数

果不其然:

Evaluate

Evaluate 头文件:

  /**
   * Evaluate the feedforward network with the given parameters. This function
   * is usually called by the optimizer to train the model.
   *
   * @param parameters Matrix model parameters.
   */
  double Evaluate(const arma::mat& parameters);

   /**
   * Evaluate the feedforward network with the given parameters, but using only
   * a number of data points. This is useful for optimizers such as SGD, which
   * require a separable objective function.
   *
   * @param parameters Matrix model parameters.
   * @param begin Index of the starting point to use for objective function
   *        evaluation.
   * @param batchSize Number of points to be passed at a time to use for
   *        objective function evaluation.
   * @param deterministic Whether or not to train or test the model. Note some
   *        layer act differently in training or testing mode.
   */
  double Evaluate(const arma::mat& parameters,
                  const size_t begin,
                  const size_t batchSize,
                  const bool deterministic);

实现:

template<typename OutputLayerType, typename InitializationRuleType,
         typename... CustomLayers>
double FFN<OutputLayerType, InitializationRuleType, CustomLayers...>::Evaluate(
    const arma::mat& parameters)
{
   
  double res = 0;
  for (size_t i = 0; i < predictors.n_cols; ++i)
    res += Evaluate(parameters, i, 1, true);

  return res;
}

template<typename OutputLayerType, typename InitializationRuleType,
         typename... CustomLayers>
double FFN<OutputLayerType, InitializationRuleType, CustomLayers...>::Evaluate(
    const arma::mat& /* parameters */,
    const size_t begin,
    const size_t batchSize,
    const bool deterministic)
{
   
  if (parameter.is_empty())
    ResetParameters();

  if (deterministic != this->deterministic)
  {
   
    this->deterministic = deterministic;
    ResetDeterministic();
  }

  Forward(predictors.cols(begin, begin + batchSize - 1));
  double res = outputLayer.Forward(
      boost::apply_visitor(outputParameterVisitor, network.back()),
      responses.cols(begin, begin + batchSize - 1));

  for (size_t i = 0; i < network.size(); ++i)
  {
   
    res += boost::apply_visitor(lossVisitor, network[i]);
  }

  return res;
}

先看一下两个 Reset 方法:

Reset

ResetDeterministic

template<typename 
  • 0
    点赞
  • 5
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值