caffe中关于solver

// NOTE

// Update the next available ID when youadd a new SolverParameter field.

//

// SolverParameter next available ID: 44(last added: plateau_winsize)

message SolverParameter {

 //

  // Specifying the train and test networksc

  //

  //Exactly one train net must be specified using one of the following fields:

 //     train_net_param, train_net,net_param, net(必须仅有一个训练网络)

  //One or more test nets may be specified using any of the following fields:

 //     test_net_param, test_net,net_param, net(可以有多个测试网络)

  //If more than one test net field is specified (e.g., both net and

  //test_net are specified), they will be evaluated in the field order given

  //above: (1) test_net_param, (2) test_net, (3) net_param/net.

  //A test_iter must bespecified for each test_net.

  //A test_level and/ora test_stage may also be specified for each test_net.

 //

//网络参数 及其 评测手段

  //Proto filename for the train net, possibly combined with one or more

  //test nets.

 optional string net = 24;

  //Inline train net param, possibly combined with one or more test nets.

 optional NetParameter net_param = 25;

 

 optional string train_net = 1; // Proto filename for the train net.

 repeated string test_net = 2; // Proto filenames for the test nets.

 optional NetParameter train_net_param = 21; // Inline train net params.

 repeated NetParameter test_net_param = 22; // Inline test net params.

 

  //The states for the train/test nets. Must be unspecified or

  //specified once per net.

  //

  //By default, all states will have solver = true;

  //train_state will have phase = TRAIN,

  //and all test_state's will have phase = TEST.

  //Other defaults are set according to the NetState defaults.

  optional NetState train_state =26;

  repeated NetState test_state =27;

 

  //Evaluation type.

 optional string eval_type = 41 [default = "classification"];

  //ap_version: different ways of computing Average Precision.

 //    Checkhttps://sanchom.wordpress.com/tag/average-precision/ for details.

 //    11point: the 11-pointinterpolated average precision. Used in VOC2007.

 //    MaxIntegral: maximallyinterpolated AP. Used in VOC2012/ILSVRC.

  //    Integral: the natural integral of theprecision-recall curve.

 optional string ap_version = 42 [default = "Integral"];

  //If true, display per class result.

 optional bool show_per_class_result = 44 [default = false];

/每个测试网络的迭代次数

  //The number of iterations for each test net.

 repeated int32 test_iter = 3;

///两次测试阶段的迭代间隔

  //The number of iterations between two testing phases.

 optional int32 test_interval = 4 [default = 0];

 optional bool test_compute_loss = 19 [default = false];

  //If true, run an initial test pass before the first iteration,

  //ensuring memory availability and printing the starting value of the loss.

 optional bool test_initialization = 32 [default = true];

  optional float base_lr = 5; //The base learning rate(后续配合衰减因子一起用)

  //the number of iterations between displaying info. If display = 0, no info

  //will be displayed.

 optional int32 display = 6;

  //Display the loss averaged over the last average_loss iterations

 optional int32 average_loss = 33 [default = 1];

  optional int32 max_iter = 7; //the maximum number of iterations

  //accumulate gradients over `iter_size` x `batch_size` instances

 optional int32 iter_size = 36 [default = 1];

//衰减机制的学习率

  //The learning rate decay policy. The currently implemented learning rate

  //policies are as follows:

 //    - fixed: always returnbase_lr.

 //    - step: return base_lr *gamma ^ (floor(iter / step))

 //    - exp: return base_lr *gamma ^ iter

 //    - inv: return base_lr * (1 +gamma * iter) ^ (- power)

 //    - multistep: similar to stepbut it allows non uniform steps defined by

 //      stepvalue

 //    - poly: the effectivelearning rate follows a polynomial decay, to be

 //      zero by the max_iter.return base_lr (1 - iter/max_iter) ^ (power)

 //    - sigmoid: the effectivelearning rate follows a sigmod decay

 //      return base_lr ( 1/(1 +exp(-gamma * (iter - stepsize))))

 //    - plateau: decreases lr

 //              if the minimumloss isn't updated for 'plateau_winsize' iters

  //

  //where base_lr, max_iter, gamma, step, stepvalue and power are defined

  //in the solver parameter protocol buffer, and iter is the current iteration.

 optional string lr_policy = 8;

 optional float gamma = 9; // The parameter to compute the learning rate.

 optional float power = 10; // The parameter to compute the learningrate.

  optional float momentum = 11; //The momentum value.

  optional float weight_decay =12; // The weight decay.

  // regularization typessupported: L1 and L2

  // controlled by weight_decay

  optional stringregularization_type = 29 [default = "L2"];

  //the stepsize for learning rate policy "step"

 optional int32 stepsize = 13;

  //the stepsize for learning rate policy "multistep"

 repeated int32 stepvalue = 34;

  //the stepsize for learning rate policy "plateau"

 repeated int32 plateau_winsize = 43;

 

  //Set clip_gradients to >= 0 to clip parameter gradients to that L2 norm,

  //whenever their actual L2 norm is larger.

  optional float clip_gradients =35 [default = -1];

/关于快照的一些东西

 optional int32 snapshot = 14 [default = 0]; // The snapshot interval

 optional string snapshot_prefix = 15; // The prefix for the snapshot.

  //whether to snapshot diff in the results or not. Snapshotting diff will help

  //debugging but the final protocol buffer size will be much larger.

 optional bool snapshot_diff = 16 [default = false];

 enum SnapshotFormat {

   HDF5 = 0;

   BINARYPROTO = 1;

  }

 optional SnapshotFormat snapshot_format = 37 [default = BINARYPROTO];

  //the mode solver will use: 0 for CPU and 1 for GPU. Use GPU in default.

 enum SolverMode {

   CPU = 0;

   GPU = 1;

  }

///最优化的模式

 optional SolverMode solver_mode = 17 [default = GPU];

  //the device_id will that be used in GPU mode. Use device_id = 0 in default.

 optional int32 device_id = 18 [default = 0];

  //If non-negative, the seed with which the Solver will initialize the Caffe

  //random number generator -- useful for reproducible results. Otherwise,

  //(and by default) initialize using a seed derived from the system clock.

 optional int64 random_seed = 20 [default = -1];

 

  //type of the solver

 optional string type = 40 [default = "SGD"];

 

  //numerical stability for RMSProp, AdaGrad and AdaDelta and Adam

 optional float delta = 31 [default = 1e-8];

  //parameters for the Adam solver

 optional float momentum2 = 39 [default = 0.999];

 

  //RMSProp decay value

  //MeanSquare(t) = rms_decay*MeanSquare(t-1) + (1-rms_decay)*SquareGradient(t)

 optional float rms_decay = 38 [default = 0.99];

 

  //If true, print information about the state of the net that may help with

  //debugging learning problems.

 optional bool debug_info = 23 [default = false];

 

  //If false, don't save a snapshot after training finishes.

 optional bool snapshot_after_train = 28 [default = true];

 

  //DEPRECATED: old solver enum types, use string instead

 enum SolverType {

   SGD = 0;

   NESTEROV = 1;

   ADAGRAD = 2;

   RMSPROP = 3;

   ADADELTA = 4;

   ADAM = 5;

  }

  //DEPRECATED: use type instead of solver_type

 optional SolverType solver_type = 30 [default = SGD];

}

补充:SGD的一些东西

随机梯度下降法:此处的随机,实际指的是样本的随机,就是取minbatch的梯度下降法。参数的跟新为:新参数= 旧参数– 学习因子* 梯度。

momentum可以让使用SGD的深度学习方法更加稳定以及快速,

根据论文《imagenet classification with deep convolution neural network》中权重都保留了上一次的权重的一部分,确实可以达到稳定的效果,至于快速,还是不懂。

并且此论文中说了weight_decay的作用,查看论文5.

好像网上说的更多的偏向于什么惩罚正则项。

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值