Caffe2 - C++ API
A deep learning, cross platform ML framework
elementwise_op.cc
1 
17 #include "caffe2/operators/elementwise_op.h"
18 
19 namespace caffe2 {
20 
21 // For some comparison and logical operators, eigen does not have vectorized
22 // math so we need to improvise.
23 #define NAIVE_FUNCTOR(name, op, input_type, output_type) \
24  struct Naive##name##Functor { \
25  template <int b_is_scalar, typename T, typename R> \
26  inline void Run(size_t n, const T* a, const T* b, R* out, CPUContext*) { \
27  for (int i = 0; i < n; ++i) { \
28  out[i] = op(a[i], b[b_is_scalar ? 0 : i]); \
29  } \
30  } \
31  template <typename T, typename R> \
32  void RunWithBroadcast( \
33  const T* a, \
34  const T* b, \
35  R* out, \
36  size_t pre, \
37  size_t n, \
38  CPUContext*) { \
39  for (int i = 0; i < pre; ++i) { \
40  for (int j = 0; j < n; ++j) { \
41  out[i * n + j] = op(a[i * n + j], b[j]); \
42  } \
43  } \
44  } \
45  template <typename T, typename R> \
46  void RunWithBroadcast2( \
47  const T* a, \
48  const T* b, \
49  R* out, \
50  size_t pre, \
51  size_t n, \
52  size_t post, \
53  CPUContext*) { \
54  for (int i = 0; i < pre; ++i) { \
55  for (int j = 0; j < n; ++j) { \
56  for (int k = 0; k < post; ++k) { \
57  out[(i * n + j) * post + k] = op(a[(i * n + j) * post + k], b[j]); \
58  } \
59  } \
60  } \
61  } \
62  }; \
63  REGISTER_CPU_OPERATOR( \
64  name, \
65  BinaryElementwiseOp< \
66  input_type, \
67  CPUContext, \
68  Naive##name##Functor, \
69  output_type>)
70 
71 #define NAIVE_LT(x, y) ((x) < (y))
72 NAIVE_FUNCTOR(LT, NAIVE_LT, NumericTypes, FixedType<bool>);
73 #undef NAIVE_LT
74 #define NAIVE_LE(x, y) ((x) <= (y))
75 NAIVE_FUNCTOR(LE, NAIVE_LE, NumericTypes, FixedType<bool>);
76 #undef NAIVE_LE
77 #define NAIVE_GT(x, y) ((x) > (y))
78 NAIVE_FUNCTOR(GT, NAIVE_GT, NumericTypes, FixedType<bool>);
79 #undef NAIVE_GT
80 #define NAIVE_GE(x, y) ((x) >= (y))
81 NAIVE_FUNCTOR(GE, NAIVE_GE, NumericTypes, FixedType<bool>);
82 #undef NAIVE_GE
83 #define NAIVE_EQ(x, y) ((x) == (y))
84 NAIVE_FUNCTOR(EQ, NAIVE_EQ, IntBoolTypes, FixedType<bool>);
85 #undef NAIVE_EQ
86 #define NAIVE_AND(x, y) ((x) & (y))
87 NAIVE_FUNCTOR(And, NAIVE_AND, BoolTypes, FixedType<bool>);
88 #undef NAIVE_AND
89 #define NAIVE_OR(x, y) ((x) | (y))
90 NAIVE_FUNCTOR(Or, NAIVE_OR, BoolTypes, FixedType<bool>);
91 #undef NAIVE_OR
92 #define NAIVE_XOR(x, y) ((x) ^ (y))
93 NAIVE_FUNCTOR(Xor, NAIVE_XOR, BoolTypes, FixedType<bool>);
94 #undef NAIVE_XOR
95 
96 struct NotFunctor {
97  inline void operator()(const int n, const bool* x, bool* y, CPUContext*) {
98  for (int i = 0; i < n; ++i) {
99  y[i] = !x[i];
100  }
101  }
102 };
103 REGISTER_CPU_OPERATOR(
104  Not,
106 
107 template <typename T>
108 void SRLHelper::sum2one(const T* x, T* y, size_t n) {
109  *y = ConstEigenArrayMap<T>(x, n, 1).sum();
110 }
111 
112 template <typename T>
113 void SRLHelper::RunWithBroadcastFront(
114  const T* x,
115  T* y,
116  size_t pre,
117  size_t n,
118  CPUContext*) {
119  EigenArrayMap<T>(y, n, 1) = ConstEigenArrayMap<T>(x, n, pre).rowwise().sum();
120 }
121 
122 template <typename T>
123 void SRLHelper::RunWithBroadcastBack(
124  const T* x,
125  T* y,
126  size_t post,
127  size_t n,
128  CPUContext*) {
129  EigenArrayMap<T>(y, 1, n) = ConstEigenArrayMap<T>(x, post, n).colwise().sum();
130 }
131 
132 template <typename T>
133 void SRLHelper::RunWithBroadcast2(
134  const T* a,
135  T* y,
136  size_t pre,
137  size_t n,
138  size_t post,
139  CPUContext*) {
140  for (int i = 0; i < n; ++i) {
141  y[i] = 0;
142  for (int j = 0; j < pre; ++j) {
143  for (int k = 0; k < post; ++k) {
144  y[i] += a[(j * n + i) * post + k];
145  }
146  }
147  }
148 }
149 
150 template <>
151 template <typename T>
153  const auto& A = Input(0);
154  const auto& B = Input(1);
155  auto* C = Output(0);
156  CAFFE_ENFORCE(&B != C, "In-place is not allowed.");
157  C->ResizeLike(B);
158  const T* Adata = A.template data<T>();
159  auto* Cdata = C->template mutable_data<T>();
160  if (B.size() == 1) {
161  auto count = A.size();
162  SRLHelper::sum2one<T>(Adata, Cdata, count);
163  } else {
164  size_t pre, n, post;
165  std::tie(pre, n, post) = calculate_broadcast_sizes(A, B, axis_);
166  if (post == 1) {
167  SRLHelper::RunWithBroadcastFront<T>(Adata, Cdata, pre, n, &context_);
168  } else if (pre == 1) {
169  SRLHelper::RunWithBroadcastBack<T>(Adata, Cdata, post, n, &context_);
170  } else {
171  SRLHelper::RunWithBroadcast2<T>(Adata, Cdata, pre, n, post, &context_);
172  }
173  }
174  return true;
175 }
176 REGISTER_CPU_OPERATOR(SumReduceLike, SumReduceLikeOp<CPUContext>);
177 
178 } // namespace caffe2
The CPU Context, representing the bare minimum of what a Context class in Caffe2 should implement...
Definition: context.h:82
Copyright (c) 2016-present, Facebook, Inc.