Caffe2 - C++ API
A deep learning, cross platform ML framework
learning_rate_functors.h
1 
17 #ifndef CAFFE2_SGD_LEARNING_RATE_FUNCTORS_H_
18 #define CAFFE2_SGD_LEARNING_RATE_FUNCTORS_H_
19 
20 #include "caffe2/core/context.h"
21 #include "caffe2/core/operator.h"
22 
23 namespace caffe2 {
24 
25 // LearningRateFunctor is a functor that when fed with an iter number, produces
26 // the learning rate for the corresponding iteration.
27 template <typename T>
29  public:
30  virtual ~LearningRateFunctor() {}
31  virtual T operator()(const int64_t iter) const = 0;
32 };
33 
34 // Fixed: not changing the learning rate at all.
35 template <typename T>
37  public:
38  T operator()(const int64_t /*iter*/) const override {
39  return 1.;
40  }
41 };
42 
43 // Alter: alternatate learning rate with active_period and inactive_period.
44 // update for for a duration of active_period and then stop for a duration of
45 // inactive_period if active_first, and vice versa
46 template <typename T>
48  public:
50  const int64_t active_period,
51  const int64_t inactive_period,
52  const bool active_first)
53  : active_period_(active_period),
54  inactive_period_(inactive_period),
55  active_first_(active_first) {}
56  T operator()(const int64_t iter) const override {
57  if (iter % (active_period_ + inactive_period_) <
58  (active_first_ ? active_period_ : inactive_period_)) {
59  return active_first_ ? 1. : 0.;
60  } else {
61  return active_first_ ? 0. : 1.;
62  };
63  };
64 
65  int64_t active_period_;
66  int64_t inactive_period_;
67  bool active_first_;
68 };
69 
70 // Step: return gamma ^ (floor(iter / step))
71 template <typename T>
73  public:
74  StepLearningRate(const int stepsize, const T gamma)
75  : stepsize_(stepsize), gamma_(gamma) {}
76  T operator()(const int64_t iter) const override {
77  return std::pow(gamma_, static_cast<T>(iter / stepsize_));
78  }
79 
80  int stepsize_;
81  T gamma_;
82 };
83 
84 // Exp: return gamma ^ iter
85 template <typename T>
87  public:
88  explicit ExpLearningRate(const T gamma) : gamma_(gamma) {}
89  T operator()(const int64_t iter) const override {
90  return std::pow(gamma_, static_cast<T>(iter));
91  }
92 
93  T gamma_;
94 };
95 
96 // Inv: return (1 + gamma * iter) ^ (-power)
97 template <typename T>
99  public:
100  InvLearningRate(const T gamma, const T power)
101  : gamma_(gamma), power_(power) {}
102  T operator()(const int64_t iter) const override {
103  return std::pow(T(1) + gamma_ * iter, -power_);
104  }
105  T gamma_;
106  T power_;
107 };
108 
109 // Poly: return (1 - iter/max_iter) ^ (power)
110 template <typename T>
112  public:
113  PolyLearningRate(const T power, const int64_t max_iter)
114  : power_(power), max_iter_(max_iter) {}
115  T operator()(const int64_t iter) const override {
116  return std::pow(1 - T(iter) / T(max_iter_), power_);
117  }
118  T power_;
119  uint64_t max_iter_;
120 };
121 
122 // LinearWarmup: return max(iter/num_iter, 1)
123 template <typename T>
125  public:
126  LinearWarmupLearningRate(const T start_multiplier, const int64_t num_iter)
127  : start_multiplier_(start_multiplier), num_iter_(num_iter) {}
128  T operator()(const int64_t iter) const override {
129  if (iter >= num_iter_) {
130  return 1.;
131  }
132  return start_multiplier_ + (1. - start_multiplier_) * T(iter) / T(num_iter_);
133  }
134  T start_multiplier_;
135  uint64_t num_iter_;
136 };
137 
138 // ConstantWarmup: return scale when iter < num_iter, and 1 otherwise
139 template <typename T>
141  public:
142  ConstantWarmupLearningRate(const T multiplier, const int64_t num_iter)
143  : multiplier_(multiplier), num_iter_(num_iter) {}
144  T operator()(const int64_t iter) const override {
145  if (iter >= num_iter_) {
146  return 1.;
147  }
148  return T(multiplier_);
149  }
150  T multiplier_;
151  uint64_t num_iter_;
152 };
153 
154 // hill: the learning rate changes according to following 3 stages
155 // 1) linear warmup (increasing) at first num_iter steps from start_multiplier
156 // 2) inverse shrink (decreasing) afterwards (gamma, power)
157 // 3) lower bounded by end_multiplier
158 template <typename T>
160  public:
162  const int64_t num_iter,
163  const T start_multiplier,
164  const T gamma,
165  const T power,
166  const T end_multiplier)
167  : linear_warmup_lr_(start_multiplier, num_iter),
168  inv_lr_(gamma, power),
169  num_iter_(num_iter),
170  end_multiplier_(end_multiplier) {}
171  T operator()(const int64_t iter) const override {
172  if (iter < num_iter_) {
173  return linear_warmup_lr_(iter);
174  } else {
175  return std::max(end_multiplier_, inv_lr_(iter - num_iter_));
176  }
177  }
178  LinearWarmupLearningRate<T> linear_warmup_lr_;
179  InvLearningRate<T> inv_lr_;
180  int64_t num_iter_;
181  T end_multiplier_;
182 };
183 
184 } // namespace caffe2
185 
186 #endif // CAFFE2_SGD_LEARNING_RATE_FUNCTORS_H_
Copyright (c) 2016-present, Facebook, Inc.