1 #include "caffe2/sgd/learning_rate_op.h" 4 REGISTER_CPU_OPERATOR(LearningRate, LearningRateOp<float, CPUContext>);
6 OPERATOR_SCHEMA(LearningRate)
10 Learning rate is a decreasing function of time. With low learning rates the 11 improvements will be linear. With high learning rates they will start to look 12 more exponential. Learning rate is controlled by the following arguments: 17 `base_lr`: base learning rate 18 `policy`: this controls how the learning rate is applied, options are: 20 `step`: uses `stepsize`, `gamma` 22 `inv`: uses `gamma`, `power` 23 `linearWarmup`: uses `start_multiplier`, `num_iter` 24 `constantWarmup`: uses `multiplier`, `num_iter` 25 `alter`: uses `active_first`, `active_period`, `inactive_period` 26 `hill`: uses those in both `linearWarmup` and `inv`, plus `end_multiplier` 27 `composite`: uses `sub_policy_num_iters` and additional args with format 28 sub_policy_{sub_policy_index}_{sub_policy_arg}, for example: 29 sub_policy_0_policy: "exp", sub_policy_0_gamma: 0.99, 30 sub_policy_0_lr_scale: 1.2 31 sub_policy_0_policy: "fixed", sub_policy_0_lr_scale: 1.0 32 sub_policy_num_iters: [1000, 1000] 35 `stepsize`: defaults to 0 36 `gamma`: defaults to 0 37 `power`: defaults to 0 38 `num_iter`: defaults to 0 39 `start_multiplier`: defaults to 0 40 `multiplier`: defaults to 0.5 44 train_net.LearningRate(*iterations*, "*label*", base_lr=*float*, 45 policy="policy_name", stepsize=*int*, gamma=*float*) 49 train_net.LearningRate(200, "LR", base_lr=-0.1, 50 policy="step", stepsize=20, gamma=0.9) 52 .Arg("base_lr",
"(float, required) base learning rate")
53 .Arg(
"policy",
"(float, default 1.0) strategy for gamma enforcement")
54 .Arg(
"power",
"(float, default 1.0) used only for inv policy type")
55 .Arg(
"gamma",
"(float, default 1.0) momentum of change")
56 .Arg(
"stepsize",
"(float, default 1.0) sampling rate on iterations")
57 .Arg(
"active_first",
"(boolean, default True) in alter policy")
58 .Arg(
"active_period",
"(int64_t, required) in alter policy")
59 .Arg(
"inactive_period",
"(int64_t, required) in alter policy")
62 "(int, default -1) maximum iterations in this training run")
65 "(int, default 0) number of iterations over which to warmup lr")
68 "(float, default 0) starting multiplier for learning rate")
71 "(float, default 0) end multiplier for learning rate")
74 "(float, default 0.5) constant multiplier for learning rate")
76 "sub_policy_num_iters",
77 "(int array, default empty) number of iterations for each sub learning rate policy in composite policy")
78 .Input(0,
"input",
"description needed")
79 .Output(0,
"output",
"description needed")
80 .DeviceInferenceFunction([](
const OperatorDef& def) {
81 return std::make_pair(
82 std::vector<DeviceOption>{DeviceOption()},
83 std::vector<DeviceOption>{def.device_option()});
86 NO_GRADIENT(LearningRate);
A global dictionary that holds information about what Caffe2 modules have been loaded in the current ...