Caffe2 - C++ API
A deep learning, cross platform ML framework
operator_gradient.h
1 
17 #ifndef CAFFE2_CORE_OPERATOR_GRADIENT_H_
18 #define CAFFE2_CORE_OPERATOR_GRADIENT_H_
19 
20 #include "caffe2/core/operator_schema.h"
21 #include "caffe2/core/registry.h"
22 #include "caffe2/proto/caffe2.pb.h"
23 #include "caffe2/utils/proto_utils.h"
24 
25 namespace caffe2 {
26 
27 /* @brief A struct that abstracts on top of dense and sparse blobs.
28  *
29  * For a dense blob, its gradient name should be written into dense_, and for
30  * a sparse blob, its gradient name should be written into indice_ for
31  * the sparse indices and value_ for the values.
32  */
34  string dense_;
35  string indices_;
36  string values_;
37 
38  inline bool IsDense() const {
39  return (dense_.size() != 0);
40  }
41  inline bool IsSparse() const {
42  return (indices_.size() != 0 || values_.size() != 0);
43  }
44  inline bool IsEmpty() const {
45  return (!IsDense() && !IsSparse());
46  }
47 };
48 
53  vector<OperatorDef> ops_;
54  vector<GradientWrapper> g_input_;
55 
56  GradientOpsMeta() {}
58  const vector<OperatorDef>& ops,
59  const vector<GradientWrapper>& v)
60  : ops_(ops), g_input_(v) {}
61 };
62 
64  public:
66  const OperatorDef& def,
67  const vector<GradientWrapper>& g_output)
68  : def_(def), g_output_(g_output), g_input_(def.input_size()){};
69  virtual ~GradientMakerBase() {}
70  virtual bool CopyDeviceOption() const {
71  return true;
72  }
73  virtual bool CopyEngine() const {
74  return true;
75  }
76  virtual bool CopyArguments() const {
77  return true;
78  }
79 
80  virtual void VerifyOp() const {
81  auto* schema = OpSchemaRegistry::Schema(def_.type());
82  if (schema) {
83  CAFFE_ENFORCE(
84  schema->Verify(def_),
85  "(GradientMaker) Operator def did not pass schema checking: ",
86  ProtoDebugString(def_));
87  }
88  }
89 
101  virtual GradientOpsMeta Get() {
102  VerifyOp();
103  vector<OperatorDef> new_defs = GetGradientDefs();
104  for (auto& opdef : new_defs) {
105  opdef.set_is_gradient_op(true);
106  }
107  return GradientOpsMeta(new_defs, g_input_);
108  };
109 
110  const OperatorDef& Def() const {
111  return def_;
112  }
113 
114  protected:
115  virtual vector<OperatorDef> GetGradientDefs() {
116  CAFFE_NOT_IMPLEMENTED;
117  }
118 
119  // Helper functions to return names for the gradient computation.
120  // I(idx), O(idx): return the input and output names.
121  // GO(idx): return the name of the gradient for output idx.
122  // GI(idx), GI_I(idx), GI_V(idx): return the name of the gradient for
123  // input idx, and also registers that name into the gradient
124  // registry to be returned.
125  string I(const int i) {
126  CAFFE_ENFORCE((i >= 0) && (i < def_.input().size()));
127  return def_.input(i);
128  }
129  string O(const int i) {
130  CAFFE_ENFORCE((i >= 0) && (i < def_.output().size()));
131  return def_.output(i);
132  }
133  string GI(const int i) {
134  CAFFE_ENFORCE(
135  !g_input_.at(i).IsSparse(),
136  "Input ",
137  def_.input(i),
138  " already set to sparse.");
139  g_input_.at(i).dense_ = GradientName(def_.input(i));
140  return GradientName(def_.input(i));
141  }
142  string GI_I(const int i) {
143  CAFFE_ENFORCE(
144  !g_input_.at(i).IsDense(),
145  "Input ",
146  def_.input(i),
147  " already set to dense.");
148  g_input_.at(i).indices_ = GradientSliceIndices(def_.input(i));
149  return GradientSliceIndices(def_.input(i));
150  }
151  string GI_V(const int i) {
152  CAFFE_ENFORCE(
153  !g_input_.at(i).IsDense(),
154  "Input ",
155  def_.input(i),
156  " already set to dense.");
157  g_input_.at(i).values_ = GradientSliceValues(def_.input(i));
158  return GradientSliceValues(def_.input(i));
159  }
160  string GO(const int i) {
161  CAFFE_ENFORCE(
162  g_output_.at(i).IsDense(),
163  "Gradient of output ",
164  def_.output(i),
165  (g_output_.at(i).IsSparse() ? " is sparse (expected dense)."
166  : " is not provided!"));
167  return g_output_.at(i).dense_;
168  }
169  string GO_I(const int i) {
170  CAFFE_ENFORCE(
171  g_output_.at(i).IsSparse(),
172  "Gradient of output ",
173  def_.output(i),
174  (g_output_.at(i).IsDense() ? " is dense (expected sparse)."
175  : " is not provided!"));
176  return g_output_.at(i).indices_;
177  }
178  string GO_V(const int i) {
179  CAFFE_ENFORCE(
180  g_output_.at(i).IsSparse(),
181  "Gradient of output ",
182  def_.output(i),
183  (g_output_.at(i).IsDense() ? " is dense (expected sparse)."
184  : " is not provided!"));
185  return g_output_.at(i).values_;
186  }
187  const GradientWrapper& GradOut(int i) {
188  return g_output_.at(i);
189  }
190 
191  // Function to add a gradient pair to map.
192  void SetDense(const int i, const string& name) {
193  CAFFE_ENFORCE(
194  !g_input_.at(i).IsSparse(),
195  "Input ",
196  def_.input(i),
197  " already set to sparse.");
198  g_input_.at(i).dense_ = name;
199  }
200  void SetSparse(const int i, const string& indices, const string& values) {
201  CAFFE_ENFORCE(
202  !g_input_.at(i).IsDense(),
203  "Input ",
204  def_.input(i),
205  " already set to dense.");
206  g_input_.at(i).indices_ = indices;
207  g_input_.at(i).values_ = values;
208  }
209 
214  template <class... Args>
215  inline static vector<OperatorDef> SingleGradientDef(const Args&... args) {
216  return vector<OperatorDef>{CreateOperatorDef(args...)};
217  }
218 
219  public:
223  static CaffeMap<string, string> MatchGradsToParams(const OperatorDef& op) {
224  // NOTE: how to go beyond string-matching?
225  CaffeMap<string, string> m;
226  for (auto& out : op.output()) {
227  if (IsGradientBlob(out)) {
228  m[out] = out.substr(0, out.length() - 5);
229  }
230  }
231  return m;
232  }
233 
234  private:
235  // Utility functions for gradient name computation. We don't expose them
236  // in order to discourage the use of such names explicitly.
237  static string GradientName(const string& name) {
238  return name + "_grad";
239  }
240 
241  static bool IsGradientBlob(const string& name) {
242  return name.length() > 5 && name.find("_grad") == name.length() - 5;
243  }
244 
245  static string GradientNameToParam(const string& name) {
246  CHECK(IsGradientBlob(name));
247  return name.substr(0, name.length() - 5);
248  }
249 
250  static string GradientSliceIndices(const string& name) {
251  return name + "_grad_indices";
252  }
253 
254  static string GradientSliceValues(const string& name) {
255  return name + "_grad_values";
256  }
257 
258  protected:
259  // We make the member variables protected in case someone wants to write
260  // a fully custom Get() function.
261  const OperatorDef& def_;
262  const vector<GradientWrapper>& g_output_;
263  vector<GradientWrapper> g_input_;
264 };
265 
276  using GradientMakerBase::GradientMakerBase;
277  vector<OperatorDef> GetGradientDefs() override {
278  return vector<OperatorDef>();
279  }
280 };
281 
289  using GradientMakerBase::GradientMakerBase;
290  GradientOpsMeta Get() override {
291  CAFFE_ENFORCE(
292  false, "One should not call gradient for operator ", def_.type(), ".");
293  }
294 };
295 
304  using GradientMakerBase::GradientMakerBase;
305  GradientOpsMeta Get() override {
306  CAFFE_ENFORCE(
307  false,
308  "Operator ",
309  def_.type(),
310  " should have a gradient but is not implemented yet.");
311  }
312 };
313 
314 CAFFE_DECLARE_REGISTRY(
315  GradientRegistry,
317  const OperatorDef&,
318  const vector<GradientWrapper>&);
319 
320 #define REGISTER_GRADIENT(name, ...) \
321  CAFFE_REGISTER_CLASS(GradientRegistry, name, __VA_ARGS__)
322 #define REGISTER_GRADIENT_STR(str_name, ...) \
323  CAFFE_REGISTER_TYPED_CLASS(GradientRegistry, str_name, __VA_ARGS__)
324 
325 // NO_GRADIENT means that the operator does not need any gradient computation.
326 #define NO_GRADIENT(name) REGISTER_GRADIENT(name, NoGradient)
327 
328 // SHOULD_NOT_DO_GRADIENT means that the operator is not designed to have
329 // gradient operators. If you attempt to call the gradient, a log fatal will
330 // occur.
331 #define SHOULD_NOT_DO_GRADIENT(name) \
332  REGISTER_GRADIENT(name, ThrowInTheTowelIfGradientIsCalled)
333 
334 #define GRADIENT_NOT_IMPLEMENTED_YET(name) \
335  REGISTER_GRADIENT(name, GradientNotImplementedYet)
336 
341  const OperatorDef& def,
342  const vector<GradientWrapper>& g_output);
343 
344 } // namespace caffe2
345 
346 #endif // CAFFE2_CORE_OPERATOR_GRADIENT_H_
A helper class to indicate that the gradient mechanism is not ready.
static CaffeMap< string, string > MatchGradsToParams(const OperatorDef &op)
Returns map that returns the parameters that the gradients are for.
A struct that holds the gradient operators and related gradient maps.
A helper class to indicate that the operator should have no gradient.
Copyright (c) 2016-present, Facebook, Inc.
static vector< OperatorDef > SingleGradientDef(const Args &...args)
a helper function to allow one to create one single operator def, which is usually the case for many ...
virtual GradientOpsMeta Get()
Returns the gradient ops meta.
GradientOpsMeta GetGradientForOp(const OperatorDef &def, const vector< GradientWrapper > &g_output)
Gets the GradientOpsMeta for the given operator def.
Definition: operator.cc:314
GradientOpsMeta Get() override
Returns the gradient ops meta.
A helper class to indicate that the operator does not need gradient computation.
GradientOpsMeta Get() override
Returns the gradient ops meta.