Caffe2 - C++ API
A deep learning, cross platform ML framework
elementwise_op_schema.cc
1 
17 #include "caffe2/core/operator_gradient.h"
18 #include "caffe2/operators/elementwise_op.h"
19 #include "caffe2/utils/proto_utils.h"
20 
21 namespace caffe2 {
22 
23 const char* kBroadcastDoc = R"DOC(
24 If necessary the right-hand-side argument will be broadcasted to match the
25 shape of left-hand-side argument. When broadcasting is specified, the second
26 tensor can either be of size 1 (a scalar value), or having its shape as a
27 contiguous subset of the first tensor's shape. The starting of the mutually
28 equal shape is specified by the argument "axis", and if it is not set, suffix
29 matching is assumed. 1-dim expansion doesn't work yet.
30 
31 For example, the following tensor shapes are supported (with broadcast=1):
32 
33  shape(A) = (2, 3, 4, 5), shape(B) = (,), i.e. B is a scalar
34  shape(A) = (2, 3, 4, 5), shape(B) = (5,)
35  shape(A) = (2, 3, 4, 5), shape(B) = (4, 5)
36  shape(A) = (2, 3, 4, 5), shape(B) = (3, 4), with axis=1
37  shape(A) = (2, 3, 4, 5), shape(B) = (2), with axis=0
38 
39 Argument `broadcast=1` needs to be passed to enable broadcasting.
40 )DOC";
41 
42 std::function<void(OpSchema&)> MathDocGenerator(const char* name) {
43  return [=](OpSchema& schema) {
44  string doc = R"DOC(
45 Performs element-wise binary {name} (with limited broadcast support).
46 {broadcast_doc})DOC";
47  ReplaceAll(doc, "{name}", name);
48  ReplaceAll(doc, "{broadcast_doc}", kBroadcastDoc);
49  schema.SetDoc(doc);
50  schema.Arg("broadcast", "Pass 1 to enable broadcasting");
51  schema.Arg(
52  "axis",
53  "If set, defines the broadcast dimensions. See doc for details.");
54  schema.Input(
55  0,
56  "A",
57  "First operand, should share the type with the second operand.");
58  schema.Input(
59  1,
60  "B",
61  "Second operand. With broadcasting can be of smaller size than A. "
62  "If broadcasting is disabled it should be of the same size.");
63  schema.Output(0, "C", "Result, has same dimensions and type as A");
64  };
65 }
66 
67 OPERATOR_SCHEMA(Add)
68  .NumInputs(2)
69  .NumOutputs(1)
70  .AllowInplace({{0, 0}, {1, 0}})
71  .CostInferenceFunction(PointwiseCostInference<1>)
72  .IdenticalTypeAndShapeOfInput(0)
73  .FillUsing(MathDocGenerator("addition"));
74 OPERATOR_SCHEMA(Sub)
75  .NumInputs(2)
76  .NumOutputs(1)
77  .AllowInplace({{0, 0}, {1, 0}})
78  .CostInferenceFunction(PointwiseCostInference<1>)
79  .IdenticalTypeAndShapeOfInput(0)
80  .FillUsing(MathDocGenerator("subtraction"));
81 OPERATOR_SCHEMA(Mul)
82  .NumInputs(2)
83  .NumOutputs(1)
84  .AllowInplace({{0, 0}, {1, 0}})
85  .CostInferenceFunction(PointwiseCostInference<1>)
86  .IdenticalTypeAndShapeOfInput(0)
87  .FillUsing(MathDocGenerator("multiplication"));
88 OPERATOR_SCHEMA(Div)
89  .NumInputs(2)
90  .NumOutputs(1)
91  .AllowInplace({{0, 0}})
92  .CostInferenceFunction(PointwiseCostInference<1>)
93  .IdenticalTypeAndShapeOfInput(0)
94  .FillUsing(MathDocGenerator("division"));
95 OPERATOR_SCHEMA(DivGradient).NumInputs(3).NumOutputs(2).AllowInplace({{0, 0}});
96 
97 OPERATOR_SCHEMA(SumReduceLike)
98  .NumInputs(2)
99  .NumOutputs(1)
100  .IdenticalTypeAndShapeOfInput(0)
101  .SetDoc(R"DOC(
102 SumReduceLike operator takes 2 tensors as input. It performs reduce sum to the
103 first input so that the output looks like the second one.
104 It assumes that the first input
105 has more dimensions than the second, and the dimensions of the second input is
106 the contiguous subset of the dimensions of the first.
107 For example, the following tensor shapes are supported:
108 
109  shape(A) = (2, 3, 4, 5), shape(B) = (4, 5)
110  shape(A) = (2, 3, 4, 5), shape(B) = (,), i.e. B is a scalar
111  shape(A) = (2, 3, 4, 5), shape(B) = (3, 4), with axis=1
112  shape(A) = (2, 3, 2, 5), shape(B) = (2), with axis=0
113  )DOC")
114  .Arg(
115  "axis",
116  "If set, defines the starting dimension for reduction. Args `axis` and "
117  "`axis_str` cannot be used simultaneously.")
118  .Arg(
119  "axis_str",
120  "If set, it could only be N or C or H or W. `order` arg should also be "
121  "provided. It defines the reduction dimensions on NCHW or NHWC. Args "
122  "`axis` and `axis_str` cannot be used simultaneously.")
123  .Arg("order", "Either NHWC or HCWH")
124  .Input(
125  0,
126  "A",
127  "First operand, should share the type with the second operand.")
128  .Input(
129  1,
130  "B",
131  "Second operand. With broadcasting can be of smaller size than A. "
132  "If broadcasting is disabled it should be of the same size.")
133  .Output(0, "C", "Result, has same dimensions and type as B");
134 
135 class GetAddGradient : public GradientMakerBase {
136  using GradientMakerBase::GradientMakerBase;
137  vector<OperatorDef> GetGradientDefs() override {
138  if (!ArgumentHelper::HasArgument(Def(), "broadcast")) {
139  SetDense(0, GO(0));
140  SetDense(1, GO(0));
141  return vector<OperatorDef>();
142  }
143  SetDense(0, GO(0));
144 
145  return SingleGradientDef(
146  "SumReduceLike",
147  "",
148  vector<string>{GO(0), I(1)},
149  vector<string>{GI(1)});
150  }
151 };
152 REGISTER_GRADIENT(Add, GetAddGradient);
153 
154 // TODO(jiayq): Although we have Sub gradient implemented, we are still missing
155 // the Negative unary operator to be implemented.
156 class GetSubGradient : public GradientMakerBase {
157  using GradientMakerBase::GradientMakerBase;
158  vector<OperatorDef> GetGradientDefs() override {
159  if (!ArgumentHelper::HasArgument(Def(), "broadcast")) {
160  SetDense(0, GO(0));
161  return SingleGradientDef(
162  "Negative", "", vector<string>{GO(0)}, vector<string>{GI(1)});
163  } else {
164  SetDense(0, GO(0));
165  vector<OperatorDef> grad_ops;
166  grad_ops.push_back(CreateOperatorDef(
167  "Negative",
168  "",
169  vector<string>{GO(0)},
170  vector<string>{GI(1) + "_autogen_pre_red"}));
171 
172  Argument axis, axis_str, order;
173  if (ArgumentHelper::HasArgument(Def(), "axis")) {
174  axis = GetArgument(Def(), "axis");
175  } else {
176  axis = MakeArgument<int>("axis", -1);
177  }
178  if (ArgumentHelper::HasArgument(Def(), "axis_str")) {
179  axis_str = GetArgument(Def(), "axis_str");
180  } else {
181  axis_str = MakeArgument<string>("axis_str", "");
182  }
183  if (ArgumentHelper::HasArgument(Def(), "order")) {
184  order = GetArgument(Def(), "order");
185  } else {
186  order = MakeArgument<string>("order", "NCHW");
187  }
188  grad_ops.push_back(CreateOperatorDef(
189  "SumReduceLike",
190  "",
191  vector<string>{GI(1) + "_autogen_pre_red", I(1)},
192  vector<string>{GI(1)},
193  vector<Argument>{axis, axis_str, order}));
194 
195  return grad_ops;
196  }
197  }
198  // Make sure the broadcast argument is not copied over.
199  bool CopyArguments() const override {
200  return false;
201  }
202 };
203 REGISTER_GRADIENT(Sub, GetSubGradient);
204 
205 class GetMulGradient : public GradientMakerBase {
206  using GradientMakerBase::GradientMakerBase;
207  vector<OperatorDef> GetGradientDefs() override {
208  CAFFE_ENFORCE(
209  Def().input(0) != Def().output(0) && Def().input(1) != Def().output(0),
210  "Gradient computation cannot be carried out if Mul uses in-place "
211  "computation: ",
212  ProtoDebugString(Def()));
213  if (!ArgumentHelper::HasArgument(Def(), "broadcast")) {
214  return vector<OperatorDef>{
215  CreateOperatorDef(
216  "Mul", "", vector<string>{GO(0), I(1)}, vector<string>{GI(0)}),
217  CreateOperatorDef(
218  "Mul", "", vector<string>{GO(0), I(0)}, vector<string>{GI(1)})};
219  } else {
220  Argument broadcast, axis, axis_str, order;
221  if (ArgumentHelper::HasArgument(Def(), "broadcast")) {
222  broadcast = GetArgument(Def(), "broadcast");
223  } else {
224  broadcast = MakeArgument<int>("broadcast", 0);
225  }
226  if (ArgumentHelper::HasArgument(Def(), "axis")) {
227  axis = GetArgument(Def(), "axis");
228  } else {
229  axis = MakeArgument<int>("axis", -1);
230  }
231  if (ArgumentHelper::HasArgument(Def(), "axis_str")) {
232  axis_str = GetArgument(Def(), "axis_str");
233  } else {
234  axis_str = MakeArgument<string>("axis_str", "");
235  }
236  if (ArgumentHelper::HasArgument(Def(), "order")) {
237  order = GetArgument(Def(), "order");
238  } else {
239  order = MakeArgument<string>("order", "NCHW");
240  }
241 
242  vector<OperatorDef> grad_ops;
243  grad_ops.push_back(CreateOperatorDef(
244  "Mul",
245  "mul_grad_1st_op",
246  vector<string>{GO(0), I(1)},
247  vector<string>{GI(0)},
248  vector<Argument>{broadcast, axis, axis_str, order}));
249  grad_ops.push_back(CreateOperatorDef(
250  "Mul",
251  "mul_gradient_2nd_op",
252  vector<string>{GO(0), I(0)},
253  vector<string>{GI(1) + "_autogen_pre_red"}));
254 
255  grad_ops.push_back(CreateOperatorDef(
256  "SumReduceLike",
257  "mul_with_broadcast_grad_3",
258  vector<string>{GI(1) + "_autogen_pre_red", I(1)},
259  vector<string>{GI(1)},
260  vector<Argument>{axis, axis_str, order}));
261 
262  return grad_ops;
263  }
264  }
265 
266  // Make sure the broadcast argument is not copied over.
267  bool CopyArguments() const override {
268  return false;
269  }
270 };
271 REGISTER_GRADIENT(Mul, GetMulGradient);
272 
273 class GetDivGradient : public GradientMakerBase {
274  using GradientMakerBase::GradientMakerBase;
275  vector<OperatorDef> GetGradientDefs() override {
276  CAFFE_ENFORCE(
277  !ArgumentHelper::HasArgument(Def(), "broadcast"),
278  "Gradient not ready yet for Div with broadcasting.");
279  return SingleGradientDef(
280  "DivGradient",
281  "",
282  vector<string>{I(1), O(0), GO(0)},
283  vector<string>{GI(0), GI(1)});
284  }
285 };
286 REGISTER_GRADIENT(Div, GetDivGradient);
287 
288 std::function<void(OpSchema&)> ComparisonDocGenerator(
289  const char* name,
290  const char* desc) {
291  return [=](OpSchema& schema) {
292  string doc = R"DOC(
293 Performs element-wise {desc} comparison `{name}` (with limited broadcast support).
294 {broadcast_doc})DOC";
295  ReplaceAll(doc, "{name}", name);
296  ReplaceAll(doc, "{desc}", desc);
297  ReplaceAll(doc, "{broadcast_doc}", kBroadcastDoc);
298  schema.SetDoc(doc);
299  schema.Arg("broadcast", "Pass 1 to enable broadcasting");
300  schema.Arg(
301  "axis",
302  "If set, defines the broadcast dimensions. See doc for details.");
303  schema.Input(
304  0,
305  "A",
306  "First operand, should share the type with the second operand.");
307  schema.Input(
308  1,
309  "B",
310  "Second operand. With broadcasting can be of smaller size than A. "
311  "If broadcasting is disabled it should be of the same size.");
312  schema.Output(0, "C", "Result, has same dimensions and A and type `bool`");
313  };
314 }
315 
316 #define CAFFE2_SCHEMA_FOR_BINARY_COMPARISON_OP(name, symbol, desc) \
317  OPERATOR_SCHEMA(name).NumInputs(2).NumOutputs(1).FillUsing( \
318  ComparisonDocGenerator(symbol, desc)); \
319  SHOULD_NOT_DO_GRADIENT(name)
320 
321 CAFFE2_SCHEMA_FOR_BINARY_COMPARISON_OP(LT, "<", "less than");
322 CAFFE2_SCHEMA_FOR_BINARY_COMPARISON_OP(LE, "<=", "less or equal than");
323 CAFFE2_SCHEMA_FOR_BINARY_COMPARISON_OP(GT, ">", "greater than");
324 CAFFE2_SCHEMA_FOR_BINARY_COMPARISON_OP(GE, ">=", "greater or equal than");
325 CAFFE2_SCHEMA_FOR_BINARY_COMPARISON_OP(EQ, "==", "equality");
326 
327 std::function<void(OpSchema&)> LogicalDocGenerator(const char* name) {
328  return [=](OpSchema& schema) {
329  string doc = R"DOC(
330 Performs element-wise logical operation `{name}` (with limited broadcast support).
331 Both input operands should be of type `bool`.
332 {broadcast_doc})DOC";
333  ReplaceAll(doc, "{name}", name);
334  ReplaceAll(doc, "{broadcast_doc}", kBroadcastDoc);
335  schema.SetDoc(doc);
336  schema.Arg("broadcast", "Pass 1 to enable broadcasting");
337  schema.Arg(
338  "axis",
339  "If set, defines the broadcast dimensions. See doc for details.");
340  schema.Input(0, "A", "First operand.");
341  schema.Input(
342  1,
343  "B",
344  "Second operand. With broadcasting can be of smaller size than A. "
345  "If broadcasting is disabled it should be of the same size.");
346  schema.Output(0, "C", "Result, has same dimensions and A and type `bool`");
347  };
348 }
349 
350 #define CAFFE2_SCHEMA_FOR_BINARY_LOGICAL_OP(name, symbol) \
351  OPERATOR_SCHEMA(name) \
352  .NumInputs(2) \
353  .NumOutputs(1) \
354  .AllowInplace({{0, 0}}) \
355  .FillUsing(LogicalDocGenerator(symbol)); \
356  SHOULD_NOT_DO_GRADIENT(name)
357 
358 CAFFE2_SCHEMA_FOR_BINARY_LOGICAL_OP(Or, "or");
359 CAFFE2_SCHEMA_FOR_BINARY_LOGICAL_OP(And, "and");
360 CAFFE2_SCHEMA_FOR_BINARY_LOGICAL_OP(Xor, "xor");
361 
362 OPERATOR_SCHEMA(Not)
363  .NumInputs(1)
364  .NumOutputs(1)
365  .SetDoc(R"DOC(Performs element-wise negation.)DOC")
366  .Input(0, "X", "Input tensor of type `bool`.")
367  .Output(0, "Y", "Output tensor of type `bool`.");
368 SHOULD_NOT_DO_GRADIENT(Not);
369 
370 } // namespace caffe2
Copyright (c) 2016-present, Facebook, Inc.