Caffe2 - C++ API
A deep learning, cross platform ML framework
elementwise_op.h
1 
17 #ifndef CAFFE2_OPERATORS_ELEMENTWISE_OP_H_
18 #define CAFFE2_OPERATORS_ELEMENTWISE_OP_H_
19 
20 #include "caffe2/core/common_omp.h"
21 #include "caffe2/core/context.h"
22 #include "caffe2/core/logging.h"
23 #include "caffe2/core/operator.h"
24 #include "caffe2/core/tensor.h"
25 #include "caffe2/utils/math.h"
26 
27 #include <tuple>
28 
29 namespace caffe2 {
30 
31 using NumericTypes = TensorTypes<int32_t, int64_t, float, double>;
32 using IntTypes = TensorTypes<int32_t, int64_t>;
33 using BoolTypes = TensorTypes<bool>;
34 using IntBoolTypes = TensorTypes<int32_t, int64_t, bool>; // discrete types
35 
37  template <typename T>
38  using type = T;
39 };
40 
41 template <typename R>
42 struct FixedType {
43  template <typename T>
44  using type = R;
45 };
46 
47 template <
48  typename InputTypes,
49  class Context,
50  class Functor,
51  class TypeMap = SameTypeAsInput>
52 class UnaryElementwiseWithArgsOp : public Operator<Context> {
53  public:
54  USE_OPERATOR_CONTEXT_FUNCTIONS;
55  UnaryElementwiseWithArgsOp(const OperatorDef& operator_def, Workspace* ws)
56  : Operator<Context>(operator_def, ws), functor_(*this) {}
57 
58  bool RunOnDevice() override {
59  return DispatchHelper<InputTypes>::call(this, Input(0));
60  }
61 
62  template <typename T>
63  bool DoRunWithType() {
64  auto& input = Input(0);
65  auto* output = Output(0);
66  output->ResizeLike(input);
67  using R = typename TypeMap::template type<T>;
68  functor_(
69  input.size(),
70  input.template data<T>(),
71  output->template mutable_data<R>(),
72  &context_);
73  return true;
74  }
75 
76  private:
77  Functor functor_;
78 };
79 
85 template <typename Functor>
87  explicit WithDefaultConstructor(OperatorBase& /*op*/) {}
88 
89  template <typename In, typename Out, typename Context>
90  void operator()(int n, const In* in, Out* out, Context* c) {
91  Functor()(n, in, out, c);
92  }
93 };
94 
100 template <
101  typename InputTypes,
102  class Context,
103  class Functor,
104  class OutputType = SameTypeAsInput>
106  InputTypes,
107  Context,
109  OutputType>;
110 
111 template <typename Context>
112 std::tuple<size_t, size_t, size_t> calculate_broadcast_sizes(
113  const Tensor<Context>& A,
114  const Tensor<Context>& B,
115  int axis) {
116  CAFFE_ENFORCE_GE(
117  A.ndim(),
118  B.ndim(),
119  "If you are doing broadcasting, input1 should have "
120  "a smaller or equal number of dimensions.");
121  if (axis == -1) {
122  axis = A.ndim() - B.ndim();
123  }
124  CAFFE_ENFORCE(
125  axis >= 0 && axis <= A.ndim() - B.ndim(),
126  "Broadcast axis should be in the range of"
127  "[0, A.ndim() - B.ndim()], but axis = ",
128  axis);
129 
130  int b_dim_start = 0;
131  while (b_dim_start < B.ndim() && B.dim(b_dim_start) == 1) {
132  ++b_dim_start;
133  }
134  int b_dim_end = B.ndim() - 1;
135  while (b_dim_end >= b_dim_start && B.dim(b_dim_end) == 1) {
136  --b_dim_end;
137  }
138  size_t pre = 1, n = 1, post = 1;
139  for (int i = 0; i < axis + b_dim_start; ++i) {
140  pre *= A.dim(i);
141  }
142  for (int i = b_dim_start; i <= b_dim_end; ++i) {
143  CAFFE_ENFORCE_EQ(
144  A.dim(i + axis), B.dim(i), "Broadcast dimension mismatch.");
145  n *= B.dim(i);
146  }
147  for (int i = axis + b_dim_end + 1; i < A.ndim(); ++i) {
148  post *= A.dim(i);
149  }
150  return std::make_tuple(pre, n, post);
151 }
152 
170 template <
171  typename InputTypes,
172  class Context,
173  class Functor,
174  class TypeMap = SameTypeAsInput>
175 class BinaryElementwiseOp : public Operator<Context> {
176  public:
177  USE_OPERATOR_CONTEXT_FUNCTIONS;
178 
179  BinaryElementwiseOp(const OperatorDef& operator_def, Workspace* ws)
180  : Operator<Context>(operator_def, ws),
181  OP_SINGLE_ARG(bool, "broadcast", enable_broadcast_, 0),
182  OP_SINGLE_ARG(int, "axis", axis_, -1),
183  OP_SINGLE_ARG(string, "axis_str", axis_str_, ""),
184  OP_SINGLE_ARG(string, "order", order_, "NCHW"),
185  functor_() {
186  // Figure out the correct axis to use.
187  if (enable_broadcast_) {
188  if (axis_ != -1) {
189  // Get axis from an explicit axis argument.
190  CAFFE_ENFORCE_EQ(
191  axis_str_.size(),
192  0,
193  "Args axis and axis_str cannot be used simultaneously.");
194  } else if (axis_str_.size()) {
195  // Get the axis index semantically.
196  CAFFE_ENFORCE_EQ(
197  axis_str_.size(), 1, "Unsupported axis string", axis_str_);
198  size_t semantic_axis_ = order_.find(axis_str_);
199  CAFFE_ENFORCE_NE(
200  semantic_axis_,
201  string::npos,
202  "Unrecognizable axis string ",
203  axis_str_,
204  " from order string ",
205  order_);
206  axis_ = semantic_axis_;
207  }
208  } else {
209  CAFFE_ENFORCE(
210  axis_ == -1 && axis_str_.size() == 0,
211  "Do not specify axis or axis_str if broadcast is not enabled.");
212  }
213  }
214 
215  bool RunOnDevice() override {
216  return DispatchHelper<InputTypes>::call(this, Input(0));
217  }
218 
219  template <typename T>
220  bool DoRunWithType() {
221  const auto& A = Input(0);
222  const auto& B = Input(1);
223  auto* C = Output(0);
224  CAFFE_ENFORCE(
225  &B != C || !enable_broadcast_,
226  "In-place is allowed only with the first tensor when broadcasting");
227  C->ResizeLike(A);
228  const T* Adata = A.template data<T>();
229  const T* Bdata = B.template data<T>();
230  auto* Cdata =
231  C->template mutable_data<typename TypeMap::template type<T>>();
232  if (!enable_broadcast_) {
233  CAFFE_ENFORCE_EQ(
234  A.dims(),
235  B.dims(),
236  "Dimension mismatch - did you forget to set broadcast=1?");
237  functor_.template Run<false>(A.size(), Adata, Bdata, Cdata, &context_);
238  } else if (B.size() == 1) {
239  functor_.template Run<true>(A.size(), Adata, Bdata, Cdata, &context_);
240  } else {
241  size_t pre, n, post;
242  std::tie(pre, n, post) = calculate_broadcast_sizes(A, B, axis_);
243  if (post == 1) {
244  functor_.RunWithBroadcast(Adata, Bdata, Cdata, pre, n, &context_);
245  } else {
246  functor_.RunWithBroadcast2(
247  Adata, Bdata, Cdata, pre, n, post, &context_);
248  }
249  }
250  return true;
251  }
252 
253  private:
254  bool enable_broadcast_;
255  int axis_;
256  string axis_str_;
257  string order_;
258  Functor functor_;
259 };
260 
261 template <typename Functor>
263  template <bool b_is_scalar, typename T, typename R, typename Context>
264  inline void Run(size_t n, const T* a, const T* b, R* out, Context* c) {
265  if (b_is_scalar) {
266  CAFFE_THROW("Broadcast not supported.");
267  } else {
268  Functor().Run(n, a, b, out, c);
269  }
270  }
271  template <typename T, typename R, typename Context>
272  inline void RunWithBroadcast(
273  const T* /*a*/,
274  const T* /*b*/,
275  R* /*out*/,
276  size_t /*pre*/,
277  size_t /*n*/,
278  Context*) {
279  CAFFE_NOT_IMPLEMENTED;
280  }
281  template <typename T, typename R, typename Context>
282  inline void RunWithBroadcast2(
283  const T* /*a*/,
284  const T* /*b*/,
285  R* /*out*/,
286  size_t /*pre*/,
287  size_t /*n*/,
288  size_t /*post*/,
289  Context*) {
290  CAFFE_NOT_IMPLEMENTED;
291  }
292 };
293 
294 // Gradient operator for elementwise division.
295 template <class Context>
296 class DivGradientOp final : public Operator<Context> {
297  public:
298  USE_SIMPLE_CTOR_DTOR(DivGradientOp);
299  USE_OPERATOR_CONTEXT_FUNCTIONS;
300 
301  bool RunOnDevice() override;
302 };
303 
304 namespace SRLHelper {
305 
306 template <typename T>
307 void sum2one(const T* a, T* y, size_t n);
308 
309 template <typename T>
310 void RunWithBroadcastFront(const T* a, T* y, size_t pre, size_t n, CPUContext*);
311 
312 template <typename T>
313 void RunWithBroadcastBack(const T* a, T* y, size_t post, size_t n, CPUContext*);
314 
315 template <typename T>
316 void RunWithBroadcast2(
317  const T* a,
318  T* y,
319  size_t pre,
320  size_t n,
321  size_t post,
322  CPUContext*);
323 
324 } // namespace SRLHelper
325 
326 // Sum reduction operator that is used for computing the gradient in cases
327 // where the forward op is in broadcast mode.
328 template <class Context>
329 class SumReduceLikeOp final : public Operator<Context> {
330  public:
331  USE_OPERATOR_CONTEXT_FUNCTIONS;
332  SumReduceLikeOp(const OperatorDef& operator_def, Workspace* ws)
333  : Operator<Context>(operator_def, ws),
334  OP_SINGLE_ARG(int, "axis", axis_, -1),
335  OP_SINGLE_ARG(string, "axis_str", axis_str_, ""),
336  OP_SINGLE_ARG(string, "order", order_, "NCHW") {
337  if (axis_ != -1) {
338  // Get axis from an explicit axis argument.
339  CAFFE_ENFORCE_EQ(
340  axis_str_.size(),
341  0,
342  "Args axis and axis_str cannot be used simultaneously.");
343  } else if (axis_str_.size()) {
344  // Get the axis index semantically.
345  CAFFE_ENFORCE_EQ(
346  axis_str_.size(), 1, "Unsupported axis string", axis_str_);
347  size_t semantic_axis = order_.find(axis_str_);
348  CAFFE_ENFORCE_NE(
349  semantic_axis,
350  string::npos,
351  "Unrecognizable axis string ",
352  axis_str_,
353  " from order string ",
354  order_);
355  axis_ = semantic_axis;
356  }
357  }
358 
359  bool RunOnDevice() override {
360  return DispatchHelper<TensorTypes<float, double>>::call(this, Input(0));
361  }
362 
363  template <typename T>
364  bool DoRunWithType();
365 
366  private:
367  int axis_;
368  string axis_str_;
369  string order_;
370  Tensor<Context> ones_;
371  Tensor<Context> sum_buffer_;
372 };
373 
374 template <class Context>
376  auto& Y = Input(0);
377  auto& Z = Input(1);
378  auto& dZ = Input(2);
379  auto* dX = Output(0);
380  auto* dY = Output(1);
381  CAFFE_ENFORCE_GT(Y.size(), 0);
382  CAFFE_ENFORCE_GT(Z.size(), 0);
383  dX->ResizeLike(Y);
384  dY->ResizeLike(Y);
385 
386  const float* Ydata = Y.template data<float>();
387  const float* Zdata = Z.template data<float>();
388  const float* dZdata = dZ.template data<float>();
389  float* dXdata = dX->template mutable_data<float>();
390  float* dYdata = dY->template mutable_data<float>();
391 
392  ElementWiseDivide(context_, Y.size(), dXdata, dYdata, dZdata, Ydata, Zdata);
393  return true;
394 }
395 
396 // For arithmetic operators, Eigen provides a good way to vectorize even
397 // when broadcasting.
398 #define EIGEN_FUNCTOR(name, eigen_op, input_type, output_type) \
399  struct Eigen##name##Functor { \
400  template <int b_is_scalar, typename T, typename R> \
401  inline void Run(size_t n, const T* a, const T* b, R* out, CPUContext*) { \
402  if (b_is_scalar) { \
403  EigenVectorArrayMap<R>(out, n) = \
404  eigen_op((ConstEigenVectorArrayMap<T>(a, n)), (b[0])); \
405  } else { \
406  EigenVectorArrayMap<R>(out, n) = eigen_op( \
407  (ConstEigenVectorArrayMap<T>(a, n)), \
408  (ConstEigenVectorArrayMap<T>(b, n))); \
409  } \
410  } \
411  template <typename T, typename R> \
412  void RunWithBroadcast( \
413  const T* a, \
414  const T* b, \
415  R* out, \
416  size_t pre, \
417  size_t n, \
418  CPUContext*) { \
419  EigenArrayMap<R>(out, n, pre) = eigen_op( \
420  (ConstEigenArrayMap<T>(a, n, pre).colwise()), \
421  (ConstEigenVectorArrayMap<T>(b, n))); \
422  } \
423  template <typename T, typename R> \
424  void RunWithBroadcast2( \
425  const T* a, \
426  const T* b, \
427  R* out, \
428  size_t pre, \
429  size_t n, \
430  size_t post, \
431  CPUContext*) { \
432  for (int i = 0; i < pre; ++i) { \
433  EigenArrayMap<R>(out + i * n * post, post, n) = eigen_op( \
434  (ConstEigenArrayMap<T>(a + i * n * post, post, n).rowwise()), \
435  (Eigen::Map<const Eigen::Array<T, 1, Eigen::Dynamic>>(b, n))); \
436  } \
437  } \
438  }; \
439  REGISTER_CPU_OPERATOR( \
440  name, \
441  BinaryElementwiseOp< \
442  input_type, \
443  CPUContext, \
444  Eigen##name##Functor, \
445  output_type>)
446 
447 } // namespace caffe2
448 
449 #endif // CAFFE2_OPERATORS_ELEMENTWISE_OP_H_
TIndex dim(const int i) const
Returns the i-th dimension of the tensor.
Definition: tensor.h:687
Tensor is the basic class in Caffe2 that stores a contiguous memory with its shape information...
Definition: tensor.h:109
The CPU Context, representing the bare minimum of what a Context class in Caffe2 should implement...
Definition: context.h:82
Workspace is a class that holds all the related objects created during runtime: (1) all blobs...
Definition: workspace.h:63
Copyright (c) 2016-present, Facebook, Inc.
Performs a binary operation (e.g.
int ndim() const
Returns the number of dimensions of the data.
Definition: tensor.h:605
WithDefaultConstructor is a functor that can be used as the functor of an UnaryElementwiseWithArgsOp...