Caffe2 - C++ API
A deep learning, cross platform ML framework
elementwise.cc
1 #include "caffe2/utils/math/elementwise.h"
2 
3 #include <algorithm>
4 #include <functional>
5 
6 #ifdef CAFFE2_USE_ACCELERATE
7 #include <Accelerate/Accelerate.h>
8 #endif // CAFFE2_USE_ACCELERATE
9 
10 #ifdef CAFFE2_USE_MKL
11 #include <mkl.h>
12 #endif // CAFFE2_USE_MKL
13 
14 #include "caffe2/core/context.h"
15 #include "caffe2/utils/eigen_utils.h"
16 
17 namespace caffe2 {
18 namespace math {
19 
21 // MKL VML alternatives.
22 // Depending on whether we are using MKL, we will delegate the Caffe2 math
23 // functions that are VML-related to either the VML call or the Eigen
24 // implementation. If you are setting the flags (such as AVX) right for your CPU
25 // architecture, usually Eigen will deliver a throughput as fast as the VML
26 // functions.
28 #ifdef CAFFE2_USE_MKL
29 
30 #define DELEGATE_SIMPLE_UNARY_FUNCTION(T, Func, MKLFunc, ...) \
31  template <> \
32  C10_EXPORT void Func<T, CPUContext>( \
33  const int N, const T* X, T* Y, CPUContext* /* context */) { \
34  MKLFunc(N, X, Y, ##__VA_ARGS__); \
35  }
36 DELEGATE_SIMPLE_UNARY_FUNCTION(
37  float,
38  Exp,
39  vmsExp,
40  VML_HA | VML_FTZDAZ_OFF | VML_ERRMODE_IGNORE)
41 DELEGATE_SIMPLE_UNARY_FUNCTION(
42  double,
43  Exp,
44  vmdExp,
45  VML_HA | VML_FTZDAZ_OFF | VML_ERRMODE_IGNORE)
46 DELEGATE_SIMPLE_UNARY_FUNCTION(float, Log, vsLn)
47 DELEGATE_SIMPLE_UNARY_FUNCTION(double, Log, vdLn)
48 DELEGATE_SIMPLE_UNARY_FUNCTION(float, Sin, vsSin)
49 DELEGATE_SIMPLE_UNARY_FUNCTION(double, Sin, vdSin)
50 DELEGATE_SIMPLE_UNARY_FUNCTION(float, Asin, vsAsin)
51 DELEGATE_SIMPLE_UNARY_FUNCTION(double, Asin, vdAsin)
52 DELEGATE_SIMPLE_UNARY_FUNCTION(float, Cos, vsCos)
53 DELEGATE_SIMPLE_UNARY_FUNCTION(double, Cos, vdCos)
54 DELEGATE_SIMPLE_UNARY_FUNCTION(float, Acos, vsAcos)
55 DELEGATE_SIMPLE_UNARY_FUNCTION(double, Acos, vdAcos)
56 DELEGATE_SIMPLE_UNARY_FUNCTION(float, Tan, vsTan)
57 DELEGATE_SIMPLE_UNARY_FUNCTION(double, Tan, vdTan)
58 DELEGATE_SIMPLE_UNARY_FUNCTION(float, Atan, vsAtan)
59 DELEGATE_SIMPLE_UNARY_FUNCTION(double, Atan, vdAtan)
60 DELEGATE_SIMPLE_UNARY_FUNCTION(float, Sinh, vsSinh)
61 DELEGATE_SIMPLE_UNARY_FUNCTION(double, Sinh, vdSinh)
62 DELEGATE_SIMPLE_UNARY_FUNCTION(float, Cosh, vsCosh)
63 DELEGATE_SIMPLE_UNARY_FUNCTION(double, Cosh, vdCosh)
64 DELEGATE_SIMPLE_UNARY_FUNCTION(float, Abs, vsAbs)
65 DELEGATE_SIMPLE_UNARY_FUNCTION(double, Abs, vdAbs)
66 DELEGATE_SIMPLE_UNARY_FUNCTION(float, Sqr, vsSqr)
67 DELEGATE_SIMPLE_UNARY_FUNCTION(double, Sqr, vdSqr)
68 DELEGATE_SIMPLE_UNARY_FUNCTION(float, Sqrt, vsSqrt)
69 DELEGATE_SIMPLE_UNARY_FUNCTION(double, Sqrt, vdSqrt)
70 DELEGATE_SIMPLE_UNARY_FUNCTION(float, Rsqrt, vsInvSqrt)
71 DELEGATE_SIMPLE_UNARY_FUNCTION(double, Rsqrt, vdInvSqrt)
72 DELEGATE_SIMPLE_UNARY_FUNCTION(float, Cbrt, vsCbrt)
73 DELEGATE_SIMPLE_UNARY_FUNCTION(double, Cbrt, vdCbrt)
74 DELEGATE_SIMPLE_UNARY_FUNCTION(float, Inv, vsInv)
75 DELEGATE_SIMPLE_UNARY_FUNCTION(double, Inv, vdInv)
76 DELEGATE_SIMPLE_UNARY_FUNCTION(float, Erf, vsErf)
77 DELEGATE_SIMPLE_UNARY_FUNCTION(double, Erf, vdErf)
78 #undef DELEGATE_SIMPLE_UNARY_FUNCTION
79 
80 #define DELEGATE_SINCOS(T, MKLFunc) \
81  template <> \
82  C10_EXPORT void SinCos<T, CPUContext>( \
83  const int N, const T* X, T* S, T* C, CPUContext* /* context */) { \
84  MKLFunc(N, X, S, C); \
85  }
86 DELEGATE_SINCOS(float, vsSinCos)
87 DELEGATE_SINCOS(double, vdSinCos)
88 #undef DELEGATE_SINCOS
89 
90 #define DELEGATE_POWX(T, MKLFunc) \
91  template <> \
92  C10_EXPORT void Powx<T, CPUContext>( \
93  const int N, const T* A, const T b, T* Y, CPUContext* /* context */) { \
94  MKLFunc(N, A, b, Y); \
95  }
96 DELEGATE_POWX(float, vsPowx)
97 DELEGATE_POWX(double, vdPowx)
98 #undef DELEGATE_POWX
99 
100 #define DELEGATE_SIMPLE_BINARY_FUNCTION(T, Func, MKLFunc) \
101  template <> \
102  C10_EXPORT void Func<T, CPUContext>( \
103  const int N, const T* A, const T* B, T* C, CPUContext* /* context */) { \
104  MKLFunc(N, A, B, C); \
105  }
106 DELEGATE_SIMPLE_BINARY_FUNCTION(float, Add, vsAdd)
107 DELEGATE_SIMPLE_BINARY_FUNCTION(double, Add, vdAdd)
108 DELEGATE_SIMPLE_BINARY_FUNCTION(float, Sub, vsSub)
109 DELEGATE_SIMPLE_BINARY_FUNCTION(double, Sub, vdSub)
110 DELEGATE_SIMPLE_BINARY_FUNCTION(float, Mul, vsMul)
111 DELEGATE_SIMPLE_BINARY_FUNCTION(double, Mul, vdMul)
112 DELEGATE_SIMPLE_BINARY_FUNCTION(float, Div, vsDiv)
113 DELEGATE_SIMPLE_BINARY_FUNCTION(double, Div, vdDiv)
114 #undef DELEGATE_SIMPLE_BINARY_FUNCTION
115 
116 #else // CAFFE2_USE_MKL
117 
118 #define DELEGATE_SIMPLE_UNARY_FUNCTION(T, Func, EigenFunc) \
119  template <> \
120  C10_EXPORT void Func<T, CPUContext>( \
121  const int N, const T* X, T* Y, CPUContext* /* context */) { \
122  EigenVectorArrayMap<T>(Y, N) = \
123  ConstEigenVectorArrayMap<T>(X, N).EigenFunc(); \
124  }
125 DELEGATE_SIMPLE_UNARY_FUNCTION(float, Exp, exp)
126 DELEGATE_SIMPLE_UNARY_FUNCTION(double, Exp, exp)
127 DELEGATE_SIMPLE_UNARY_FUNCTION(float, Log, log)
128 DELEGATE_SIMPLE_UNARY_FUNCTION(double, Log, log)
129 DELEGATE_SIMPLE_UNARY_FUNCTION(float, Sin, sin)
130 DELEGATE_SIMPLE_UNARY_FUNCTION(double, Sin, sin)
131 DELEGATE_SIMPLE_UNARY_FUNCTION(float, Asin, asin)
132 DELEGATE_SIMPLE_UNARY_FUNCTION(double, Asin, asin)
133 DELEGATE_SIMPLE_UNARY_FUNCTION(float, Cos, cos)
134 DELEGATE_SIMPLE_UNARY_FUNCTION(double, Cos, cos)
135 DELEGATE_SIMPLE_UNARY_FUNCTION(float, Acos, acos)
136 DELEGATE_SIMPLE_UNARY_FUNCTION(double, Acos, acos)
137 DELEGATE_SIMPLE_UNARY_FUNCTION(float, Tan, tan)
138 DELEGATE_SIMPLE_UNARY_FUNCTION(double, Tan, tan)
139 DELEGATE_SIMPLE_UNARY_FUNCTION(float, Atan, atan)
140 DELEGATE_SIMPLE_UNARY_FUNCTION(double, Atan, atan)
141 DELEGATE_SIMPLE_UNARY_FUNCTION(float, Abs, abs)
142 DELEGATE_SIMPLE_UNARY_FUNCTION(double, Abs, abs)
143 DELEGATE_SIMPLE_UNARY_FUNCTION(float, Sqr, square)
144 DELEGATE_SIMPLE_UNARY_FUNCTION(double, Sqr, square)
145 DELEGATE_SIMPLE_UNARY_FUNCTION(float, Sqrt, sqrt)
146 DELEGATE_SIMPLE_UNARY_FUNCTION(double, Sqrt, sqrt)
147 DELEGATE_SIMPLE_UNARY_FUNCTION(float, Rsqrt, rsqrt)
148 DELEGATE_SIMPLE_UNARY_FUNCTION(double, Rsqrt, rsqrt)
149 DELEGATE_SIMPLE_UNARY_FUNCTION(float, Inv, inverse)
150 DELEGATE_SIMPLE_UNARY_FUNCTION(double, Inv, inverse)
151 #undef DELEGATE_SIMPLE_UNARY_FUNCTION
152 
153 #define CAFFE2_SPECIALIZED_SINH(T) \
154  template <> \
155  C10_EXPORT void Sinh<T, CPUContext>( \
156  const int N, const T* X, T* Y, CPUContext* /* context */) { \
157  ConstEigenVectorArrayMap<T> X_arr(X, N); \
158  EigenVectorArrayMap<T>(Y, N) = (X_arr.exp() - (-X_arr).exp()) / T(2); \
159  }
160 CAFFE2_SPECIALIZED_SINH(float)
161 CAFFE2_SPECIALIZED_SINH(double)
162 #undef CAFFE2_SPECIALIZED_SINH
163 
164 #define CAFFE2_SPECIALIZED_COSH(T) \
165  template <> \
166  C10_EXPORT void Cosh<T, CPUContext>( \
167  const int N, const T* X, T* Y, CPUContext* /* context */) { \
168  ConstEigenVectorArrayMap<T> X_arr(X, N); \
169  EigenVectorArrayMap<T>(Y, N) = (X_arr.exp() + (-X_arr).exp()) / T(2); \
170  }
171 CAFFE2_SPECIALIZED_COSH(float)
172 CAFFE2_SPECIALIZED_COSH(double)
173 #undef CAFFE2_SPECIALIZED_COSH
174 
175 #define CAFFE2_SPECIALIZED_SINCOS(T) \
176  template <> \
177  C10_EXPORT void SinCos<T, CPUContext>( \
178  const int N, const T* X, T* S, T* C, CPUContext* /* context */) { \
179  EigenVectorArrayMap<T>(S, N) = ConstEigenVectorArrayMap<T>(X, N).sin(); \
180  EigenVectorArrayMap<T>(C, N) = ConstEigenVectorArrayMap<T>(X, N).cos(); \
181  }
182 CAFFE2_SPECIALIZED_SINCOS(float)
183 CAFFE2_SPECIALIZED_SINCOS(double)
184 #undef CAFFE2_SPECIALIZED_SINCOS
185 
186 #define CAFFE2_SPECIALIZED_POWX(T) \
187  template <> \
188  C10_EXPORT void Powx<T, CPUContext>( \
189  const int N, const T* A, const T b, T* Y, CPUContext* /* context */) { \
190  EigenVectorArrayMap<T>(Y, N) = ConstEigenVectorArrayMap<T>(A, N).pow(b); \
191  }
192 CAFFE2_SPECIALIZED_POWX(float)
193 CAFFE2_SPECIALIZED_POWX(double)
194 #undef CAFFE2_SPECIALIZED_POWX
195 
196 #define CAFFE2_SPECIALIZED_CBRT(T) \
197  template <> \
198  C10_EXPORT void Cbrt<T, CPUContext>( \
199  const int N, const T* X, T* Y, CPUContext* /* context */) { \
200  std::transform(X, X + N, Y, [](const T x) { return cbrt(x); }); \
201  }
202 CAFFE2_SPECIALIZED_CBRT(float)
203 CAFFE2_SPECIALIZED_CBRT(double)
204 #undef CAFFE2_SPECIALIZED_CBRT
205 
206 #define CAFFE2_SPECIALIZED_ERF(T) \
207  template <> \
208  C10_EXPORT void Erf<T, CPUContext>( \
209  const int N, const T* X, T* Y, CPUContext* /* context */) { \
210  std::transform(X, X + N, Y, [](const T x) { return erf(x); }); \
211  }
212 CAFFE2_SPECIALIZED_ERF(float)
213 CAFFE2_SPECIALIZED_ERF(double)
214 #undef CAFFE2_SPECIALIZED_ERF
215 
216 #define DELEGATE_SIMPLE_BINARY_FUNCTION_BY_EIGEN_OPERATOR(T, Func, EigenOp) \
217  template <> \
218  C10_EXPORT void Func<T, CPUContext>( \
219  const int N, const T* A, const T* B, T* C, CPUContext* /* context */) { \
220  EigenVectorMap<T>(C, N) = ConstEigenVectorArrayMap<T>(A, N) \
221  EigenOp ConstEigenVectorArrayMap<T>(B, N); \
222  }
223 DELEGATE_SIMPLE_BINARY_FUNCTION_BY_EIGEN_OPERATOR(float, Add, +)
224 DELEGATE_SIMPLE_BINARY_FUNCTION_BY_EIGEN_OPERATOR(double, Add, +)
225 DELEGATE_SIMPLE_BINARY_FUNCTION_BY_EIGEN_OPERATOR(float, Sub, -)
226 DELEGATE_SIMPLE_BINARY_FUNCTION_BY_EIGEN_OPERATOR(double, Sub, -)
227 DELEGATE_SIMPLE_BINARY_FUNCTION_BY_EIGEN_OPERATOR(float, Mul, *)
228 DELEGATE_SIMPLE_BINARY_FUNCTION_BY_EIGEN_OPERATOR(double, Mul, *)
229 DELEGATE_SIMPLE_BINARY_FUNCTION_BY_EIGEN_OPERATOR(float, Div, /)
230 DELEGATE_SIMPLE_BINARY_FUNCTION_BY_EIGEN_OPERATOR(double, Div, /)
231 #undef DELEGATE_SIMPLE_BINARY_FUNCTION_BY_EIGEN_OPERATOR
232 
233 #endif // CAFFE2_USE_MKL
234 
236 // BLAS alternatives.
237 // Depending on whether we have specified an external BLAS library or not, we
238 // will delegate the Caffe math functions that are BLAS-related to either the
239 // CBLAS call or the Eigen implementation.
241 #ifdef CAFFE2_USE_EIGEN_FOR_BLAS
242 
243 #define CAFFE2_SPECIALIZED_SCALE(TAlpha, TData) \
244  template <> \
245  C10_EXPORT void Scale<TAlpha, TData, CPUContext>( \
246  const int N, \
247  const TAlpha alpha, \
248  const TData* X, \
249  TData* Y, \
250  CPUContext* /* context */) { \
251  if (X == Y) { \
252  EigenVectorArrayMap<TData>(Y, N) *= static_cast<TData>(alpha); \
253  } else { \
254  EigenVectorArrayMap<TData>(Y, N) = \
255  ConstEigenVectorArrayMap<TData>(X, N) * static_cast<TData>(alpha); \
256  } \
257  } \
258  template <> \
259  C10_EXPORT void Scale<TAlpha, TData, CPUContext>( \
260  const int N, \
261  const TAlpha* alpha, \
262  const TData* X, \
263  TData* Y, \
264  CPUContext* /* context */) { \
265  if (X == Y) { \
266  EigenVectorArrayMap<TData>(Y, N) *= static_cast<TData>(*alpha); \
267  } else { \
268  EigenVectorArrayMap<TData>(Y, N) = \
269  ConstEigenVectorArrayMap<TData>(X, N) * static_cast<TData>(*alpha); \
270  } \
271  }
272 CAFFE2_SPECIALIZED_SCALE(float, float)
273 CAFFE2_SPECIALIZED_SCALE(double, double)
274 CAFFE2_SPECIALIZED_SCALE(float, double)
275 #undef CAFFE2_SPECIALIZED_SCALE
276 
277 #else // CAFFE2_USE_EIGEN_FOR_BLAS
278 
279 #ifdef CAFFE2_USE_MKL
280 
281 #define DELEGATE_SCALE(TAlpha, TData, MKLFunc1, MKLFunc2) \
282  template <> \
283  C10_EXPORT void Scale<TAlpha, TData, CPUContext>( \
284  const int N, \
285  const TAlpha alpha, \
286  const TData* X, \
287  TData* Y, \
288  CPUContext* /* context */) { \
289  if (Y == X) { \
290  MKLFunc1(N, static_cast<TData>(alpha), Y, 1); \
291  } else { \
292  MKLFunc2(N, static_cast<TData>(alpha), X, 1, TData(0), Y, 1); \
293  } \
294  } \
295  template <> \
296  C10_EXPORT void Scale<TAlpha, TData, CPUContext>( \
297  const int N, \
298  const TAlpha* alpha, \
299  const TData* X, \
300  TData* Y, \
301  CPUContext* /* context */) { \
302  if (Y == X) { \
303  MKLFunc1(N, static_cast<TData>(*alpha), Y, 1); \
304  } else { \
305  MKLFunc2(N, static_cast<TData>(*alpha), X, 1, TData(0), Y, 1); \
306  } \
307  }
308 DELEGATE_SCALE(float, float, cblas_sscal, cblas_saxpby)
309 DELEGATE_SCALE(double, double, cblas_dscal, cblas_daxpby)
310 DELEGATE_SCALE(float, double, cblas_dscal, cblas_daxpby)
311 #undef DELEGATE_SCALE
312 
313 #else // CAFFE2_USE_MKL
314 
315 #define DELEGATE_SCALE(TAlpha, TData, BLASFunc) \
316  template <> \
317  C10_EXPORT void Scale<TAlpha, TData, CPUContext>( \
318  const int N, \
319  const TAlpha alpha, \
320  const TData* X, \
321  TData* Y, \
322  CPUContext* /* context */) { \
323  if (Y == X) { \
324  BLASFunc(N, static_cast<TData>(alpha), Y, 1); \
325  } else { \
326  EigenVectorArrayMap<TData>(Y, N) = \
327  ConstEigenVectorArrayMap<TData>(X, N) * static_cast<TData>(alpha); \
328  } \
329  } \
330  template <> \
331  C10_EXPORT void Scale<TAlpha, TData, CPUContext>( \
332  const int N, \
333  const TAlpha* alpha, \
334  const TData* X, \
335  TData* Y, \
336  CPUContext* /* context */) { \
337  if (Y == X) { \
338  BLASFunc(N, static_cast<TData>(*alpha), Y, 1); \
339  } else { \
340  EigenVectorArrayMap<TData>(Y, N) = \
341  ConstEigenVectorArrayMap<TData>(X, N) * static_cast<TData>(*alpha); \
342  } \
343  }
344 DELEGATE_SCALE(float, float, cblas_sscal)
345 DELEGATE_SCALE(double, double, cblas_dscal)
346 DELEGATE_SCALE(float, double, cblas_dscal)
347 #undef DELEGATE_SCALE
348 
349 #endif // CAFFE2_USE_MKL
350 
351 #endif // CAFFE2_USE_EIGEN_FOR_BLAS
352 
354 // Common math functions being used in Caffe that do not have a BLAS or MKL
355 // equivalent. For all these functions, we will simply implement them either via
356 // Eigen or via custom code.
358 
359 #define CAFFE2_SPECIALIZED_SET(T) \
360  template <> \
361  C10_EXPORT void Set<T, CPUContext>( \
362  const std::int64_t N, const T alpha, T* Y, CPUContext* /* context */) { \
363  if (N == 0) { \
364  return; \
365  } \
366  if (alpha == T(0)) { \
367  std::memset(Y, 0, N * sizeof(T)); \
368  } else { \
369  EigenVectorArrayMap<T>(Y, N).setConstant(alpha); \
370  } \
371  }
372 CAFFE2_SPECIALIZED_SET(float)
373 CAFFE2_SPECIALIZED_SET(double)
374 CAFFE2_SPECIALIZED_SET(int)
375 CAFFE2_SPECIALIZED_SET(std::int8_t)
376 CAFFE2_SPECIALIZED_SET(std::int16_t)
377 CAFFE2_SPECIALIZED_SET(std::int64_t)
378 CAFFE2_SPECIALIZED_SET(bool)
379 CAFFE2_SPECIALIZED_SET(char)
380 CAFFE2_SPECIALIZED_SET(std::uint8_t)
381 CAFFE2_SPECIALIZED_SET(std::uint16_t)
382 #undef CAFFE2_SPECIALIZED_SET
383 
384 #define DELEGATE_SIMPLE_UNARY_FUNCTION(T, Func, EigenFunc) \
385  template <> \
386  C10_EXPORT void Func<T, CPUContext>( \
387  const int N, const T* X, T* Y, CPUContext* /* context */) { \
388  EigenVectorArrayMap<T>(Y, N) = \
389  ConstEigenVectorArrayMap<T>(X, N).EigenFunc(); \
390  }
391 // Eigen's Tanh implementation is faster than MKL, so use Eigen here.
392 DELEGATE_SIMPLE_UNARY_FUNCTION(float, Tanh, tanh)
393 DELEGATE_SIMPLE_UNARY_FUNCTION(double, Tanh, tanh)
394 DELEGATE_SIMPLE_UNARY_FUNCTION(std::int32_t, Sign, sign)
395 DELEGATE_SIMPLE_UNARY_FUNCTION(std::int64_t, Sign, sign)
396 DELEGATE_SIMPLE_UNARY_FUNCTION(float, Sign, sign)
397 DELEGATE_SIMPLE_UNARY_FUNCTION(double, Sign, sign)
398 DELEGATE_SIMPLE_UNARY_FUNCTION(std::int32_t, Abs, abs)
399 DELEGATE_SIMPLE_UNARY_FUNCTION(std::int64_t, Abs, abs)
400 DELEGATE_SIMPLE_UNARY_FUNCTION(std::int32_t, Cube, cube)
401 DELEGATE_SIMPLE_UNARY_FUNCTION(std::int64_t, Cube, cube)
402 DELEGATE_SIMPLE_UNARY_FUNCTION(float, Cube, cube)
403 DELEGATE_SIMPLE_UNARY_FUNCTION(double, Cube, cube)
404 #undef DELEGATE_SIMPLE_UNARY_FUNCTION
405 
406 #define CAFFE2_SPECIALIZED_NEG(T) \
407  template <> \
408  C10_EXPORT void Neg<T, CPUContext>( \
409  const int N, const T* X, T* Y, CPUContext* /* context */) { \
410  EigenVectorArrayMap<T>(Y, N) = -ConstEigenVectorArrayMap<T>(X, N); \
411  }
412 CAFFE2_SPECIALIZED_NEG(std::int32_t)
413 CAFFE2_SPECIALIZED_NEG(std::int64_t)
414 CAFFE2_SPECIALIZED_NEG(float)
415 CAFFE2_SPECIALIZED_NEG(double)
416 #undef CAFFE2_SPECIALIZED_NEG
417 
418 #define CAFFE2_SPECIALIZED_SCALE(TAlpha, TData) \
419  template <> \
420  C10_EXPORT void Scale<TAlpha, TData, CPUContext>( \
421  const int N, \
422  const TAlpha alpha, \
423  const TData* X, \
424  TData* Y, \
425  CPUContext* /* context */) { \
426  if (X == Y) { \
427  EigenVectorArrayMap<TData>(Y, N) *= static_cast<TData>(alpha); \
428  } else { \
429  EigenVectorArrayMap<TData>(Y, N) = \
430  ConstEigenVectorArrayMap<TData>(X, N) * static_cast<TData>(alpha); \
431  } \
432  } \
433  template <> \
434  C10_EXPORT void Scale<TAlpha, TData, CPUContext>( \
435  const int N, \
436  const TAlpha* alpha, \
437  const TData* X, \
438  TData* Y, \
439  CPUContext* /* context */) { \
440  if (X == Y) { \
441  EigenVectorArrayMap<TData>(Y, N) *= static_cast<TData>(*alpha); \
442  } else { \
443  EigenVectorArrayMap<TData>(Y, N) = \
444  ConstEigenVectorArrayMap<TData>(X, N) * static_cast<TData>(*alpha); \
445  } \
446  }
447 CAFFE2_SPECIALIZED_SCALE(std::int32_t, std::int32_t)
448 CAFFE2_SPECIALIZED_SCALE(std::int64_t, std::int64_t)
449 #undef CAFFE2_SPECIALIZED_SCALE
450 
451 #define DELEGATE_SIMPLE_BINARY_FUNCTION_BY_EIGEN_OPERATOR(T, Func, EigenOp) \
452  template <> \
453  C10_EXPORT void Func<T, CPUContext>( \
454  const int N, const T* A, const T* B, T* C, CPUContext* /* context */) { \
455  EigenVectorMap<T>(C, N) = ConstEigenVectorArrayMap<T>(A, N) \
456  EigenOp ConstEigenVectorArrayMap<T>(B, N); \
457  }
458 DELEGATE_SIMPLE_BINARY_FUNCTION_BY_EIGEN_OPERATOR(std::int32_t, Add, +)
459 DELEGATE_SIMPLE_BINARY_FUNCTION_BY_EIGEN_OPERATOR(std::int64_t, Add, +)
460 DELEGATE_SIMPLE_BINARY_FUNCTION_BY_EIGEN_OPERATOR(std::int32_t, Sub, -)
461 DELEGATE_SIMPLE_BINARY_FUNCTION_BY_EIGEN_OPERATOR(std::int64_t, Sub, -)
462 DELEGATE_SIMPLE_BINARY_FUNCTION_BY_EIGEN_OPERATOR(std::int32_t, Mul, *)
463 DELEGATE_SIMPLE_BINARY_FUNCTION_BY_EIGEN_OPERATOR(std::int64_t, Mul, *)
464 DELEGATE_SIMPLE_BINARY_FUNCTION_BY_EIGEN_OPERATOR(std::int32_t, Div, /)
465 DELEGATE_SIMPLE_BINARY_FUNCTION_BY_EIGEN_OPERATOR(std::int64_t, Div, /)
466 #undef DELEGATE_SIMPLE_BINARY_FUNCTION_BY_EIGEN_OPERATOR
467 
468 #define DELEGATE_SIMPLE_BINARY_FUNCTION_BY_EIGEN_FUNCTION(T, Func, EigenFunc) \
469  template <> \
470  C10_EXPORT void Func<T, CPUContext>( \
471  const int N, const T* A, const T* B, T* C, CPUContext* /* context */) { \
472  EigenVectorMap<T>(C, N) = ConstEigenVectorArrayMap<T>(A, N).EigenFunc( \
473  ConstEigenVectorArrayMap<T>(B, N)); \
474  }
475 DELEGATE_SIMPLE_BINARY_FUNCTION_BY_EIGEN_FUNCTION(std::int32_t, Min, min)
476 DELEGATE_SIMPLE_BINARY_FUNCTION_BY_EIGEN_FUNCTION(std::int64_t, Min, min)
477 DELEGATE_SIMPLE_BINARY_FUNCTION_BY_EIGEN_FUNCTION(float, Min, min)
478 DELEGATE_SIMPLE_BINARY_FUNCTION_BY_EIGEN_FUNCTION(double, Min, min)
479 DELEGATE_SIMPLE_BINARY_FUNCTION_BY_EIGEN_FUNCTION(std::int32_t, Max, max)
480 DELEGATE_SIMPLE_BINARY_FUNCTION_BY_EIGEN_FUNCTION(std::int64_t, Max, max)
481 DELEGATE_SIMPLE_BINARY_FUNCTION_BY_EIGEN_FUNCTION(float, Max, max)
482 DELEGATE_SIMPLE_BINARY_FUNCTION_BY_EIGEN_FUNCTION(double, Max, max)
483 #undef DELEGATE_SIMPLE_BINARY_FUNCTION_BY_EIGEN_FUNCTION
484 
485 #define DELEGATE_SIMPLE_BINARY_FUNCTION_BY_STD_FUNCTION(T, Func, StdFunc) \
486  template <> \
487  C10_EXPORT void Func<T, CPUContext>( \
488  const int N, const T* A, const T* B, T* C, CPUContext* /* context */) { \
489  std::transform(A, A + N, B, C, StdFunc); \
490  }
491 DELEGATE_SIMPLE_BINARY_FUNCTION_BY_STD_FUNCTION(
492  bool,
493  And,
494  std::logical_and<bool>())
495 DELEGATE_SIMPLE_BINARY_FUNCTION_BY_STD_FUNCTION(
496  bool,
497  Or,
498  std::logical_or<bool>())
499 DELEGATE_SIMPLE_BINARY_FUNCTION_BY_STD_FUNCTION(bool, Xor, std::bit_xor<bool>())
500 DELEGATE_SIMPLE_BINARY_FUNCTION_BY_STD_FUNCTION(
501  bool,
502  BitwiseAnd,
503  std::bit_and<bool>())
504 DELEGATE_SIMPLE_BINARY_FUNCTION_BY_STD_FUNCTION(
505  std::int32_t,
506  BitwiseAnd,
507  std::bit_and<std::int32_t>())
508 DELEGATE_SIMPLE_BINARY_FUNCTION_BY_STD_FUNCTION(
509  std::int64_t,
510  BitwiseAnd,
511  std::bit_and<std::int64_t>())
512 DELEGATE_SIMPLE_BINARY_FUNCTION_BY_STD_FUNCTION(
513  bool,
514  BitwiseOr,
515  std::bit_or<bool>())
516 DELEGATE_SIMPLE_BINARY_FUNCTION_BY_STD_FUNCTION(
517  std::int32_t,
518  BitwiseOr,
519  std::bit_or<std::int32_t>())
520 DELEGATE_SIMPLE_BINARY_FUNCTION_BY_STD_FUNCTION(
521  std::int64_t,
522  BitwiseOr,
523  std::bit_or<std::int64_t>())
524 DELEGATE_SIMPLE_BINARY_FUNCTION_BY_STD_FUNCTION(
525  bool,
526  BitwiseXor,
527  std::bit_xor<bool>())
528 DELEGATE_SIMPLE_BINARY_FUNCTION_BY_STD_FUNCTION(
529  std::int32_t,
530  BitwiseXor,
531  std::bit_xor<std::int32_t>())
532 DELEGATE_SIMPLE_BINARY_FUNCTION_BY_STD_FUNCTION(
533  std::int64_t,
534  BitwiseXor,
535  std::bit_xor<std::int64_t>())
536 #undef DELEGATE_SIMPLE_BINARY_FUNCTION_BY_STD_FUNCTION
537 
538 #define DELEGATE_SIMPLE_COMPARE_FUNCTION_BY_EIGEN_OPERATOR(T, Func, EigenOp) \
539  template <> \
540  C10_EXPORT void Func<T, CPUContext>( \
541  const int N, \
542  const T* A, \
543  const T* B, \
544  bool* C, \
545  CPUContext* /* context */) { \
546  EigenVectorArrayMap<bool>(C, N) = ConstEigenVectorArrayMap<T>(A, N) \
547  EigenOp ConstEigenVectorArrayMap<T>(B, N); \
548  }
549 DELEGATE_SIMPLE_COMPARE_FUNCTION_BY_EIGEN_OPERATOR(bool, EQ, ==)
550 DELEGATE_SIMPLE_COMPARE_FUNCTION_BY_EIGEN_OPERATOR(std::int32_t, EQ, ==)
551 DELEGATE_SIMPLE_COMPARE_FUNCTION_BY_EIGEN_OPERATOR(std::int64_t, EQ, ==)
552 DELEGATE_SIMPLE_COMPARE_FUNCTION_BY_EIGEN_OPERATOR(float, EQ, ==)
553 DELEGATE_SIMPLE_COMPARE_FUNCTION_BY_EIGEN_OPERATOR(double, EQ, ==)
554 DELEGATE_SIMPLE_COMPARE_FUNCTION_BY_EIGEN_OPERATOR(bool, NE, !=)
555 DELEGATE_SIMPLE_COMPARE_FUNCTION_BY_EIGEN_OPERATOR(std::int32_t, NE, !=)
556 DELEGATE_SIMPLE_COMPARE_FUNCTION_BY_EIGEN_OPERATOR(std::int64_t, NE, !=)
557 DELEGATE_SIMPLE_COMPARE_FUNCTION_BY_EIGEN_OPERATOR(float, NE, !=)
558 DELEGATE_SIMPLE_COMPARE_FUNCTION_BY_EIGEN_OPERATOR(double, NE, !=)
559 DELEGATE_SIMPLE_COMPARE_FUNCTION_BY_EIGEN_OPERATOR(bool, LT, <)
560 DELEGATE_SIMPLE_COMPARE_FUNCTION_BY_EIGEN_OPERATOR(std::int32_t, LT, <)
561 DELEGATE_SIMPLE_COMPARE_FUNCTION_BY_EIGEN_OPERATOR(std::int64_t, LT, <)
562 DELEGATE_SIMPLE_COMPARE_FUNCTION_BY_EIGEN_OPERATOR(float, LT, <)
563 DELEGATE_SIMPLE_COMPARE_FUNCTION_BY_EIGEN_OPERATOR(double, LT, <)
564 DELEGATE_SIMPLE_COMPARE_FUNCTION_BY_EIGEN_OPERATOR(bool, LE, <=)
565 DELEGATE_SIMPLE_COMPARE_FUNCTION_BY_EIGEN_OPERATOR(std::int32_t, LE, <=)
566 DELEGATE_SIMPLE_COMPARE_FUNCTION_BY_EIGEN_OPERATOR(std::int64_t, LE, <=)
567 DELEGATE_SIMPLE_COMPARE_FUNCTION_BY_EIGEN_OPERATOR(float, LE, <=)
568 DELEGATE_SIMPLE_COMPARE_FUNCTION_BY_EIGEN_OPERATOR(double, LE, <=)
569 DELEGATE_SIMPLE_COMPARE_FUNCTION_BY_EIGEN_OPERATOR(bool, GT, >)
570 DELEGATE_SIMPLE_COMPARE_FUNCTION_BY_EIGEN_OPERATOR(std::int32_t, GT, >)
571 DELEGATE_SIMPLE_COMPARE_FUNCTION_BY_EIGEN_OPERATOR(std::int64_t, GT, >)
572 DELEGATE_SIMPLE_COMPARE_FUNCTION_BY_EIGEN_OPERATOR(float, GT, >)
573 DELEGATE_SIMPLE_COMPARE_FUNCTION_BY_EIGEN_OPERATOR(double, GT, >)
574 DELEGATE_SIMPLE_COMPARE_FUNCTION_BY_EIGEN_OPERATOR(bool, GE, >=)
575 DELEGATE_SIMPLE_COMPARE_FUNCTION_BY_EIGEN_OPERATOR(std::int32_t, GE, >=)
576 DELEGATE_SIMPLE_COMPARE_FUNCTION_BY_EIGEN_OPERATOR(std::int64_t, GE, >=)
577 DELEGATE_SIMPLE_COMPARE_FUNCTION_BY_EIGEN_OPERATOR(float, GE, >=)
578 DELEGATE_SIMPLE_COMPARE_FUNCTION_BY_EIGEN_OPERATOR(double, GE, >=)
579 #undef DELEGATE_SIMPLE_COMPARE_FUNCTION_BY_EIGEN_OPERATOR
580 
581 } // namespace math
582 } // namespace caffe2
A global dictionary that holds information about what Caffe2 modules have been loaded in the current ...
Definition: blob.h:13
Definition: OpClasses.h:659