Caffe2 - C++ API
A deep learning, cross platform ML framework
learning_rate_functors.h
1 #ifndef CAFFE2_SGD_LEARNING_RATE_FUNCTORS_H_
2 #define CAFFE2_SGD_LEARNING_RATE_FUNCTORS_H_
3 
4 #include <list>
5 #include <map>
6 
7 #include "caffe2/core/context.h"
8 #include "caffe2/core/operator.h"
9 
10 namespace caffe2 {
11 
12 // LearningRateFunctor is a functor that when fed with an iter number, produces
13 // the learning rate for the corresponding iteration.
14 template <typename T>
16  public:
17  virtual ~LearningRateFunctor() {}
18  virtual T operator()(const int64_t iter) const = 0;
19 };
20 
21 // Fixed: not changing the learning rate at all.
22 template <typename T>
24  public:
25  T operator()(const int64_t /*iter*/) const override {
26  return 1.;
27  }
28 };
29 
30 // Alter: alternatate learning rate with active_period and inactive_period.
31 // update for for a duration of active_period and then stop for a duration of
32 // inactive_period if active_first, and vice versa
33 template <typename T>
35  public:
37  const int64_t active_period,
38  const int64_t inactive_period,
39  const bool active_first)
40  : active_period_(active_period),
41  inactive_period_(inactive_period),
42  active_first_(active_first) {}
43  T operator()(const int64_t iter) const override {
44  if (iter % (active_period_ + inactive_period_) <
45  (active_first_ ? active_period_ : inactive_period_)) {
46  return active_first_ ? 1. : 0.;
47  } else {
48  return active_first_ ? 0. : 1.;
49  };
50  };
51 
52  int64_t active_period_;
53  int64_t inactive_period_;
54  bool active_first_;
55 };
56 
57 // Step: return gamma ^ (floor(iter / step))
58 template <typename T>
60  public:
61  StepLearningRate(const int stepsize, const T gamma)
62  : stepsize_(stepsize), gamma_(gamma) {}
63  T operator()(const int64_t iter) const override {
64  return std::pow(gamma_, static_cast<T>(iter / stepsize_));
65  }
66 
67  int stepsize_;
68  T gamma_;
69 };
70 
71 // Exp: return gamma ^ iter
72 template <typename T>
74  public:
75  explicit ExpLearningRate(const T gamma) : gamma_(gamma) {}
76  T operator()(const int64_t iter) const override {
77  return std::pow(gamma_, static_cast<T>(iter));
78  }
79 
80  T gamma_;
81 };
82 
83 // Inv: return (1 + gamma * iter) ^ (-power)
84 template <typename T>
86  public:
87  InvLearningRate(const T gamma, const T power)
88  : gamma_(gamma), power_(power) {}
89  T operator()(const int64_t iter) const override {
90  return std::pow(T(1) + gamma_ * iter, -power_);
91  }
92  T gamma_;
93  T power_;
94 };
95 
96 // Poly: return (1 - iter/max_iter) ^ (power)
97 template <typename T>
99  public:
100  PolyLearningRate(const T power, const int64_t max_iter)
101  : power_(power), max_iter_(max_iter) {}
102  T operator()(const int64_t iter) const override {
103  return std::pow(1 - T(iter) / T(max_iter_), power_);
104  }
105  T power_;
106  uint64_t max_iter_;
107 };
108 
109 // LinearWarmup: return max(iter/num_iter, 1)
110 template <typename T>
112  public:
113  LinearWarmupLearningRate(const T start_multiplier, const int64_t num_iter)
114  : start_multiplier_(start_multiplier), num_iter_(num_iter) {}
115  T operator()(const int64_t iter) const override {
116  if (iter >= num_iter_) {
117  return 1.;
118  }
119  return start_multiplier_ + (1. - start_multiplier_) * T(iter) / T(num_iter_);
120  }
121  T start_multiplier_;
122  uint64_t num_iter_;
123 };
124 
125 // ConstantWarmup: return scale when iter < num_iter, and 1 otherwise
126 template <typename T>
128  public:
129  ConstantWarmupLearningRate(const T multiplier, const int64_t num_iter)
130  : multiplier_(multiplier), num_iter_(num_iter) {}
131  T operator()(const int64_t iter) const override {
132  if (iter >= num_iter_) {
133  return 1.;
134  }
135  return T(multiplier_);
136  }
137  T multiplier_;
138  uint64_t num_iter_;
139 };
140 
141 // hill: the learning rate changes according to following 3 stages
142 // 1) linear warmup (increasing) at first num_iter steps from start_multiplier
143 // 2) inverse shrink (decreasing) afterwards (gamma, power)
144 // 3) lower bounded by end_multiplier
145 template <typename T>
147  public:
149  const int64_t num_iter,
150  const T start_multiplier,
151  const T gamma,
152  const T power,
153  const T end_multiplier)
154  : linear_warmup_lr_(start_multiplier, num_iter),
155  inv_lr_(gamma, power),
156  num_iter_(num_iter),
157  end_multiplier_(end_multiplier) {}
158  T operator()(const int64_t iter) const override {
159  if (iter < num_iter_) {
160  return linear_warmup_lr_(iter);
161  } else {
162  return std::max(end_multiplier_, inv_lr_(iter - num_iter_));
163  }
164  }
165  LinearWarmupLearningRate<T> linear_warmup_lr_;
166  InvLearningRate<T> inv_lr_;
167  int64_t num_iter_;
168  T end_multiplier_;
169 };
170 
171 template <typename T>
173  public:
174  CompositeLearningRateItem(int64_t num_iter, LearningRateFunctor<T>* policy)
175  : num_iter_(num_iter), policy_(policy) {}
176  int64_t num_iter_;
177  LearningRateFunctor<T>* policy_;
178 };
179 
180 // composite: the learning policy changes according to current iteration #
181 template <typename T>
183  public:
185  const std::list<CompositeLearningRateItem<T>>& sub_policies) {
186  DCHECK_GT(sub_policies.size(), 0);
187  int64_t num_iter_start = 1;
188  for (auto it = sub_policies.begin(); it != sub_policies.end(); ++it) {
189  DCHECK_GT(it->num_iter_, 0);
190  sub_policies_[num_iter_start].reset(it->policy_);
191  num_iter_start += it->num_iter_;
192  }
193  }
194  T operator()(const int64_t iter) const override {
195  auto sub_policy = sub_policies_.upper_bound(iter);
196  DCHECK(sub_policy != sub_policies_.begin());
197  --sub_policy;
198  return (*sub_policy->second)(iter);
199  }
200 
201  private:
202  std::map<int64_t, std::unique_ptr<LearningRateFunctor<T>>> sub_policies_;
203 };
204 
205 } // namespace caffe2
206 
207 #endif // CAFFE2_SGD_LEARNING_RATE_FUNCTORS_H_
A global dictionary that holds information about what Caffe2 modules have been loaded in the current ...
Definition: blob.h:13