doxygen-c/html/lars__op_8cc_source.html

 #include "caffe2/sgd/lars_op.h"

 namespace caffe2 {

 template <>
 void LarsOp<float, CPUContext>::ComputeLearningRate(
     const float* wd,
     const float* trust,
     const float* lr_max,
     float offset,
     float lr_min,
     float* X_norm,
     float* dX_norm,
     float* lr_rescaled) {
   float val = 1.0;

   if (*X_norm > 0) {
     val = (*trust) / (*dX_norm / *X_norm + (*wd) + offset);
   }
   *lr_rescaled = fmaxf(fminf(val, *lr_max), lr_min);
 }

 REGISTER_CPU_OPERATOR(Lars, LarsOp<float, CPUContext>);

 OPERATOR_SCHEMA(Lars)
     .NumInputs(5)
     .NumOutputs(1)
     .SetDoc(R"DOC(
 Implement Layer-wise Adaptive Rate Scaling (LARS) with clipping. Before adding weight
 decay, given a parameter tensor X and its gradient dX, the local learning rate
 for X will be

 local_lr = trust * norm(X) / ( norm(dX) + wd * norm(X) + offset * norm(X) )

       = trust / ( norm(dX) / norm(X) + wd + offset ),

 where offset is a preset hyper-parameter to avoid numerical issue and trust
 indicates how much we trust the layer to change its parameters during one update.
 In this implementation, we uses l2 norm and the computed local learning rate is
 clipped based on the upper bound lr_max and the lower bound lr_min:

 local_lr = min(local_lr, lr_max) and local_lr = max(local_lr, lr_min)

 )DOC")
     .Input(0, "X", "Parameter tensor")
     .Input(1, "dX", "Gradient tensor")
     .Input(2, "wd", "Weight decay")
     .Input(3, "trust", "Trust")
     .Input(4, "lr_max", "Upper bound of learning rate")
     .Output(0, "lr_rescaled", "Rescaled local learning rate")
     .Arg("offset", "rescaling offset parameter")
     .Arg("lr_min", "minimum learning rate for clipping");

 SHOULD_NOT_DO_GRADIENT(Lars);
 } // namespace caffe2
caffe2
A global dictionary that holds information about what Caffe2 modules have been loaded in the current ...
Definition: blob.h:13