Caffe2 - C++ API
A deep learning, cross platform ML framework
adagrad_op.h
1 
17 #pragma once
18 
19 #include "caffe2/core/operator.h"
20 
21 namespace caffe2 {
22 
23 template <typename Context>
24 void adagrad_update(
25  int N,
26  const float* w,
27  const float* g,
28  const float* h,
29  float* nw,
30  float* nh,
31  float epsilon,
32  float decay,
33  const float* lr,
34  Context* /*context*/) {
35  for (auto i = 0; i < N; ++i) {
36  float gi = g[i];
37  float hi = nh[i] = decay * h[i] + gi * gi;
38  nw[i] = w[i] + lr[0] * gi / (std::sqrt(hi) + epsilon);
39  }
40 }
41 
42 template <typename T, class Context>
43 class AdagradOp final : public Operator<Context> {
44  public:
45  USE_OPERATOR_CONTEXT_FUNCTIONS;
46  AdagradOp(const OperatorDef& operator_def, Workspace* ws)
47  : Operator<Context>(operator_def, ws),
48  epsilon_(OperatorBase::GetSingleArgument<T>("epsilon", 1e-5f)),
49  decay_(OperatorBase::GetSingleArgument<T>("decay", 1.0f)) {}
50 
51  bool RunOnDevice() override {
52  CAFFE_ENFORCE(Input(GRAD).size() == Input(MOMENT_1).size());
53  CAFFE_ENFORCE(Input(GRAD).size() == Input(PARAM).size());
54  Output(OUTPUT_PARAM)->ResizeLike(Input(PARAM));
55  Output(OUTPUT_MOMENT_1)->ResizeLike(Input(MOMENT_1));
56  adagrad_update<Context>(
57  Input(GRAD).size(),
58  Input(PARAM).template data<T>(),
59  Input(GRAD).template data<T>(),
60  Input(MOMENT_1).template data<T>(),
61  Output(OUTPUT_PARAM)->template mutable_data<T>(),
62  Output(OUTPUT_MOMENT_1)->template mutable_data<T>(),
63  epsilon_,
64  decay_,
65  Input(LR).template data<T>(),
66  &context_);
67  return true;
68  }
69 
70  protected:
71  T epsilon_;
72  T decay_;
73  INPUT_TAGS(PARAM, MOMENT_1, GRAD, LR);
74  OUTPUT_TAGS(OUTPUT_PARAM, OUTPUT_MOMENT_1);
75 };
76 
77 template <typename T, class Context>
78 class SparseAdagradOp final : public Operator<Context> {
79  public:
80  USE_OPERATOR_CONTEXT_FUNCTIONS;
81  SparseAdagradOp(const OperatorDef& operator_def, Workspace* ws)
82  : Operator<Context>(operator_def, ws),
83  epsilon_(OperatorBase::GetSingleArgument<float>("epsilon", 1e-5f)) {}
84 
85  bool RunOnDevice() override {
86  // Enforce shapes
87  CAFFE_ENFORCE_EQ(Input(PARAM).size(), Input(MOMENT_1).size());
88  CAFFE_ENFORCE_EQ(Input(LR).size(), 1);
89  CAFFE_ENFORCE_EQ(
90  Input(PARAM).size_from_dim(1),
91  Input(GRAD).size_from_dim(Input(INDICES).ndim()));
92 
94  this, Input(INDICES));
95  }
96 
97  template <typename SIndex>
98  bool DoRunWithType() {
99  const auto* lr = Input(LR).template data<T>();
100  const auto* indices = Input(INDICES).template data<SIndex>();
101  const auto* gradIn = Input(GRAD).template data<T>();
102  const auto* paramIn = Input(PARAM).template data<T>();
103  const auto* momentIn = Input(MOMENT_1).template data<T>();
104  auto* paramOut = Output(OUTPUT_PARAM)->template mutable_data<T>();
105  auto* momentOut = Output(OUTPUT_MOMENT_1)->template mutable_data<T>();
106 
107  auto n = Input(INDICES).size();
108  if (n == 0) {
109  return true;
110  }
111 
112  auto block_size = Input(GRAD).size() / n;
113  for (auto i = 0; i < n; ++i) {
114  auto idx = indices[i];
115  if (block_size == 1) {
116  float gi = gradIn[i];
117  float hi = momentOut[idx] = momentIn[idx] + gi * gi;
118  paramOut[idx] = paramIn[idx] + lr[0] * gi / (std::sqrt(hi) + epsilon_);
119  } else {
120  auto offsetI = i * block_size;
121  auto offsetIdx = idx * block_size;
122 
123 #ifndef NDEBUG
124  CAFFE_ENFORCE_GE(
125  Input(PARAM).size(),
126  block_size + offsetIdx,
127  this->debug_def().input(PARAM),
128  ", out of bound, idx:",
129  idx,
130  " for input i:",
131  i,
132  " and block size:",
133  block_size);
134  CAFFE_ENFORCE_GE(
135  Input(GRAD).size(),
136  block_size + offsetI,
137  this->debug_def().input(GRAD),
138  ", out of bound idx, idx:",
139  idx,
140  " for input i:",
141  i);
142 #endif
143  adagrad_update(
144  block_size,
145  paramIn + offsetIdx,
146  gradIn + offsetI,
147  momentIn + offsetIdx,
148  paramOut + offsetIdx,
149  momentOut + offsetIdx,
150  epsilon_,
151  1.0f,
152  lr,
153  &context_);
154  }
155  }
156  return true;
157  }
158 
159  protected:
160  T epsilon_;
161  INPUT_TAGS(PARAM, MOMENT_1, INDICES, GRAD, LR);
162  OUTPUT_TAGS(OUTPUT_PARAM, OUTPUT_MOMENT_1);
163 };
164 
165 template <typename T, class Context>
166 class RowWiseSparseAdagradOp final : public Operator<Context> {
167  public:
168  USE_OPERATOR_CONTEXT_FUNCTIONS;
169  RowWiseSparseAdagradOp(const OperatorDef& operator_def, Workspace* ws)
170  : Operator<Context>(operator_def, ws),
171  epsilon_(OperatorBase::GetSingleArgument<float>("epsilon", 1e-5f)) {}
172 
173  bool RunOnDevice() override {
174  // Enforce shapes
175  CAFFE_ENFORCE_EQ(Input(PARAM).dims()[0], Input(MOMENT_1).size());
176  CAFFE_ENFORCE_EQ(Input(LR).size(), 1);
177  CAFFE_ENFORCE_EQ(
178  Input(PARAM).size_from_dim(1),
179  Input(GRAD).size_from_dim(Input(INDICES).ndim()));
180 
182  this, Input(INDICES));
183  }
184 
185  template <typename SIndex>
186  bool DoRunWithType() {
187  const auto* lr = Input(LR).template data<T>();
188  const auto* indices = Input(INDICES).template data<SIndex>();
189  const auto* gradIn = Input(GRAD).template data<T>();
190  const auto* paramIn = Input(PARAM).template data<T>();
191  const auto* momentIn = Input(MOMENT_1).template data<T>();
192  auto* paramOut = Output(OUTPUT_PARAM)->template mutable_data<T>();
193  auto* momentOut = Output(OUTPUT_MOMENT_1)->template mutable_data<T>();
194 
195  auto n = Input(INDICES).size();
196  if (n == 0) {
197  return true;
198  }
199 
200  auto block_size = Input(GRAD).size() / n;
201 
202  for (auto i = 0; i < n; ++i) {
203  auto idx = indices[i];
204  if (block_size == 1) {
205  float gi = gradIn[i];
206  float hi = momentOut[idx] = momentIn[idx] + gi * gi;
207  paramOut[idx] = paramIn[idx] + lr[0] * gi / (std::sqrt(hi) + epsilon_);
208  } else {
209  auto offsetI = i * block_size;
210  auto offsetIdx = idx * block_size;
211 
212 #ifndef NDEBUG
213  CAFFE_ENFORCE_GE(
214  Input(PARAM).size(),
215  block_size + offsetIdx,
216  this->debug_def().input(PARAM),
217  ", out of bound, idx:",
218  idx,
219  " for input i:",
220  i,
221  " and block size:",
222  block_size);
223  CAFFE_ENFORCE_GE(
224  Input(GRAD).size(),
225  block_size + offsetI,
226  this->debug_def().input(GRAD),
227  ", out of bound idx, idx:",
228  idx,
229  " for input i:",
230  i);
231 #endif
232 
233  const float* w = paramIn + offsetIdx;
234  const float* g = gradIn + offsetI;
235  const float* h = momentIn + idx;
236  float* nw = paramOut + offsetIdx;
237  float* nh = momentOut + idx;
238  float hs = 0.;
239  for (auto j = 0; j < block_size; ++j) {
240  float gj = g[j];
241  hs += gj * gj;
242  }
243  float hi = nh[0] = h[0] + hs / block_size;
244  float step = lr[0] / (std::sqrt(hi) + epsilon_);
245  for (auto j = 0; j < block_size; ++j) {
246  nw[j] = w[j] + g[j] * step;
247  }
248  }
249  }
250  return true;
251  }
252 
253  protected:
254  T epsilon_;
255  INPUT_TAGS(PARAM, MOMENT_1, INDICES, GRAD, LR);
256  OUTPUT_TAGS(OUTPUT_PARAM, OUTPUT_MOMENT_1);
257 };
258 }
Workspace is a class that holds all the related objects created during runtime: (1) all blobs...
Definition: workspace.h:63
Copyright (c) 2016-present, Facebook, Inc.