Caffe2 - C++ API
A deep learning, cross platform ML framework
reduce.cc
1 #include "caffe2/utils/math/reduce.h"
2 
3 #include <algorithm>
4 #include <cstring>
5 #include <functional>
6 #include <numeric>
7 #include <vector>
8 
9 #ifdef CAFFE2_USE_ACCELERATE
10 #include <Accelerate/Accelerate.h>
11 #endif // CAFFE2_USE_ACCELERATE
12 
13 #ifdef CAFFE2_USE_MKL
14 #include <mkl.h>
15 #endif // CAFFE2_USE_MKL
16 
17 #include "caffe2/core/context.h"
18 #include "caffe2/utils/eigen_utils.h"
19 #include "caffe2/utils/math/elementwise.h"
20 #include "caffe2/utils/math/utils.h"
21 
22 namespace caffe2 {
23 namespace math {
24 
25 namespace {
26 
27 #define DELEGATE_ROWWISE_REDUCE_FUNCTION(Func, EigenFunc) \
28  template <typename T> \
29  void Rowwise##Func( \
30  const int rows, \
31  const int cols, \
32  const T alpha, \
33  const T* X, \
34  T* Y, \
35  CPUContext* /* context */) { \
36  EigenVectorMap<T>(Y, rows) = ConstEigenMatrixMap<T>(X, cols, rows) \
37  .colwise() \
38  .EigenFunc() \
39  .transpose() * \
40  alpha; \
41  }
42 DELEGATE_ROWWISE_REDUCE_FUNCTION(ReduceMin, minCoeff)
43 DELEGATE_ROWWISE_REDUCE_FUNCTION(ReduceMax, maxCoeff)
44 DELEGATE_ROWWISE_REDUCE_FUNCTION(ReduceSum, sum)
45 DELEGATE_ROWWISE_REDUCE_FUNCTION(ReduceMean, mean)
46 DELEGATE_ROWWISE_REDUCE_FUNCTION(ReduceL1, template lpNorm<1>)
47 DELEGATE_ROWWISE_REDUCE_FUNCTION(ReduceL2, norm)
48 #undef DELEGATE_ROWWISE_REDUCE_FUNCTION
49 
50 #ifndef CAFFE2_USE_EIGEN_FOR_BLAS
51 
52 #define DELEGATE_ROWWISE_REDUCE_FUNCTION(T, Func, BLASFunc) \
53  template <> \
54  void Rowwise##Func( \
55  const int rows, \
56  const int cols, \
57  const T alpha, \
58  const T* X, \
59  T* Y, \
60  CPUContext* /* context */) { \
61  for (int i = 0; i < rows; ++i) { \
62  Y[i] = BLASFunc(cols, X + i * cols, 1) * alpha; \
63  } \
64  }
65 DELEGATE_ROWWISE_REDUCE_FUNCTION(float, ReduceL1, cblas_sasum)
66 DELEGATE_ROWWISE_REDUCE_FUNCTION(double, ReduceL1, cblas_dasum)
67 DELEGATE_ROWWISE_REDUCE_FUNCTION(float, ReduceL2, cblas_snrm2)
68 DELEGATE_ROWWISE_REDUCE_FUNCTION(double, ReduceL2, cblas_dnrm2)
69 #undef DELEGATE_ROWWISE_REDUCE_FUNCTION
70 
71 #endif // CAFFE2_USE_EIGEN_FOR_BLAS
72 
73 #define DELEGATE_COLWISE_REDUCE_FUNCTION(Func, MathFunc) \
74  template <typename T> \
75  void Colwise##Func( \
76  const int rows, \
77  const int cols, \
78  const T alpha, \
79  const T* X, \
80  T* Y, \
81  CPUContext* context) { \
82  std::memcpy(Y, X, sizeof(T) * cols); \
83  for (int i = 1; i < rows; ++i) { \
84  MathFunc<T, CPUContext>(cols, Y, X + i * cols, Y, context); \
85  } \
86  Scale<T, T, CPUContext>(cols, alpha, Y, Y, context); \
87  }
88 DELEGATE_COLWISE_REDUCE_FUNCTION(ReduceMin, Min)
89 DELEGATE_COLWISE_REDUCE_FUNCTION(ReduceMax, Max)
90 DELEGATE_COLWISE_REDUCE_FUNCTION(ReduceSum, Add)
91 #undef DELEGATE_COLWISE_REDUCE_FUNCTION
92 
93 template <typename T>
94 void ColwiseReduceMean(
95  const int rows,
96  const int cols,
97  const T alpha,
98  const T* X,
99  T* Y,
100  CPUContext* context) {
101  ColwiseReduceSum<T>(rows, cols, alpha / static_cast<T>(rows), X, Y, context);
102 }
103 
104 template <typename T>
105 void ColwiseReduceL1(
106  const int rows,
107  const int cols,
108  const T alpha,
109  const T* X,
110  T* Y,
111  CPUContext* context) {
112  ConstEigenArrayMap<T> X_arr(X, cols, rows);
113  EigenVectorArrayMap<T> Y_arr(Y, cols);
114  Y_arr = X_arr.col(0).abs();
115  for (int i = 1; i < rows; ++i) {
116  Y_arr += X_arr.col(i).abs();
117  }
118  Scale<T, T, CPUContext>(cols, alpha, Y, Y, context);
119 }
120 
121 template <typename T>
122 void ColwiseReduceL2(
123  const int rows,
124  const int cols,
125  const T alpha,
126  const T* X,
127  T* Y,
128  CPUContext* /* context */) {
129  ConstEigenArrayMap<T> X_arr(X, cols, rows);
130  EigenVectorArrayMap<T> Y_arr(Y, cols);
131  Y_arr = X_arr.col(0).square();
132  for (int i = 1; i < rows; ++i) {
133  Y_arr += X_arr.col(i).square();
134  }
135  Y_arr = Y_arr.sqrt() * alpha;
136 }
137 
138 template <typename T>
139 void BothEndsReduceMin(
140  const int M,
141  const int N,
142  const int K,
143  const T alpha,
144  const T* X,
145  T* Y,
146  CPUContext* context) {
147  EigenVectorArrayMap<T> Y_arr(Y, N);
148  Y_arr = ConstEigenArrayMap<T>(X, K, N).colwise().minCoeff();
149  for (int i = 1; i < M; ++i) {
150  ConstEigenArrayMap<T> X_arr(X + i * N * K, K, N);
151  for (int j = 0; j < N; ++j) {
152  Y[j] = std::min(Y[j], X_arr.col(j).minCoeff());
153  }
154  }
155  Scale<T, T, CPUContext>(N, alpha, Y, Y, context);
156 }
157 
158 template <typename T>
159 void BothEndsReduceMax(
160  const int M,
161  const int N,
162  const int K,
163  const T alpha,
164  const T* X,
165  T* Y,
166  CPUContext* context) {
167  EigenVectorArrayMap<T> Y_arr(Y, N);
168  Y_arr = ConstEigenArrayMap<T>(X, K, N).colwise().maxCoeff();
169  for (int i = 1; i < M; ++i) {
170  ConstEigenArrayMap<T> X_arr(X + i * N * K, K, N);
171  for (int j = 0; j < N; ++j) {
172  Y[j] = std::max(Y[j], X_arr.col(j).maxCoeff());
173  }
174  }
175  Scale<T, T, CPUContext>(N, alpha, Y, Y, context);
176 }
177 
178 template <typename T>
179 void BothEndsReduceSum(
180  const int M,
181  const int N,
182  const int K,
183  const T alpha,
184  const T* X,
185  T* Y,
186  CPUContext* context) {
187  EigenVectorArrayMap<T> Y_arr(Y, N);
188  Y_arr = ConstEigenArrayMap<T>(X, K, N).colwise().sum();
189  for (int i = 1; i < M; ++i) {
190  Y_arr +=
191  ConstEigenArrayMap<T>(X + i * N * K, K, N).colwise().sum().transpose();
192  }
193  Scale<T, T, CPUContext>(N, alpha, Y, Y, context);
194 }
195 
196 template <typename T>
197 void BothEndsReduceMean(
198  const int M,
199  const int N,
200  const int K,
201  const T alpha,
202  const T* X,
203  T* Y,
204  CPUContext* context) {
205  EigenVectorArrayMap<T> Y_arr(Y, N);
206  Y_arr = ConstEigenArrayMap<T>(X, K, N).colwise().sum();
207  for (int i = 1; i < M; ++i) {
208  Y_arr +=
209  ConstEigenArrayMap<T>(X + i * N * K, K, N).colwise().sum().transpose();
210  }
211  Scale<T, T, CPUContext>(N, alpha / static_cast<T>(M * K), Y, Y, context);
212 }
213 
214 template <typename T>
215 void BothEndsReduceL1(
216  const int M,
217  const int N,
218  const int K,
219  const T alpha,
220  const T* X,
221  T* Y,
222  CPUContext* context) {
223  EigenVectorMap<T> Y_vec(Y, N);
224  Y_vec = ConstEigenMatrixMap<T>(X, K, N).colwise().template lpNorm<1>();
225  for (int i = 1; i < M; ++i) {
226  Y_vec += ConstEigenMatrixMap<T>(X + i * N * K, K, N)
227  .colwise()
228  .template lpNorm<1>()
229  .transpose();
230  }
231  Scale<T, T, CPUContext>(N, alpha, Y, Y, context);
232 }
233 
234 template <typename T>
235 void BothEndsReduceL2(
236  const int M,
237  const int N,
238  const int K,
239  const T alpha,
240  const T* X,
241  T* Y,
242  CPUContext* /* context */) {
243  ConstEigenArrayMap<T> X0_arr(X, K, N);
244  EigenVectorArrayMap<T> Y_arr(Y, N);
245  for (int i = 0; i < N; ++i) {
246  Y_arr(i) = X0_arr.col(i).square().sum();
247  }
248  for (int i = 1; i < M; ++i) {
249  ConstEigenArrayMap<T> Xi_arr(X + i * N * K, K, N);
250  for (int j = 0; j < N; ++j) {
251  Y_arr(j) += Xi_arr.col(j).square().sum();
252  }
253  }
254  Y_arr = Y_arr.sqrt() * alpha;
255 }
256 
257 template <typename T, class Reducer>
258 void ReduceTensorImpl(
259  const int ndim,
260  const int* X_dims,
261  const int* Y_dims,
262  const Reducer& reducer,
263  const T init,
264  const T* X,
265  T* Y,
266  CPUContext* context) {
267  const int X_size =
268  std::accumulate(X_dims, X_dims + ndim, 1, std::multiplies<int>());
269  const int Y_size =
270  std::accumulate(Y_dims, Y_dims + ndim, 1, std::multiplies<int>());
271  Set<T, CPUContext>(Y_size, init, Y, context);
272  std::vector<int> index(ndim, 0);
273  for (int X_index = 0; X_index < X_size; ++X_index) {
274  const int Y_index = utils::GetIndexFromDims(ndim, Y_dims, index.data());
275  Y[Y_index] = reducer(Y[Y_index], X[X_index]);
276  utils::IncreaseIndexInDims(ndim, X_dims, index.data());
277  }
278 }
279 
280 template <typename T>
281 void ReduceMinImpl(
282  const int ndim,
283  const int* X_dims,
284  const int* Y_dims,
285  const T alpha,
286  const T* X,
287  T* Y,
288  CPUContext* context) {
289  ReduceTensorImpl(
290  ndim,
291  X_dims,
292  Y_dims,
293  [](const T a, const T b) { return std::min(a, b); },
294  std::numeric_limits<T>::max(),
295  X,
296  Y,
297  context);
298  const int Y_size =
299  std::accumulate(Y_dims, Y_dims + ndim, 1, std::multiplies<int>());
300  Scale<T, T, CPUContext>(Y_size, alpha, Y, Y, context);
301 }
302 
303 template <typename T>
304 void ReduceMaxImpl(
305  const int ndim,
306  const int* X_dims,
307  const int* Y_dims,
308  const T alpha,
309  const T* X,
310  T* Y,
311  CPUContext* context) {
312  ReduceTensorImpl(
313  ndim,
314  X_dims,
315  Y_dims,
316  [](const T a, const T b) { return std::max(a, b); },
317  std::numeric_limits<T>::lowest(),
318  X,
319  Y,
320  context);
321  const int Y_size =
322  std::accumulate(Y_dims, Y_dims + ndim, 1, std::multiplies<int>());
323  Scale<T, T, CPUContext>(Y_size, alpha, Y, Y, context);
324 }
325 
326 template <typename T>
327 void ReduceSumImpl(
328  const int ndim,
329  const int* X_dims,
330  const int* Y_dims,
331  const T alpha,
332  const T* X,
333  T* Y,
334  CPUContext* context) {
335  ReduceTensorImpl(ndim, X_dims, Y_dims, std::plus<T>(), T(0), X, Y, context);
336  const int Y_size =
337  std::accumulate(Y_dims, Y_dims + ndim, 1, std::multiplies<int>());
338  Scale<T, T, CPUContext>(Y_size, alpha, Y, Y, context);
339 }
340 
341 template <typename T>
342 void ReduceMeanImpl(
343  const int ndim,
344  const int* X_dims,
345  const int* Y_dims,
346  const T alpha,
347  const T* X,
348  T* Y,
349  CPUContext* context) {
350  ReduceTensorImpl(ndim, X_dims, Y_dims, std::plus<T>(), T(0), X, Y, context);
351  const int X_size =
352  std::accumulate(X_dims, X_dims + ndim, 1, std::multiplies<int>());
353  const int Y_size =
354  std::accumulate(Y_dims, Y_dims + ndim, 1, std::multiplies<int>());
355  Scale<T, T, CPUContext>(
356  Y_size,
357  alpha * static_cast<T>(Y_size) / static_cast<T>(X_size),
358  Y,
359  Y,
360  context);
361 }
362 
363 template <typename T>
364 void ReduceL1Impl(
365  const int ndim,
366  const int* X_dims,
367  const int* Y_dims,
368  const T alpha,
369  const T* X,
370  T* Y,
371  CPUContext* context) {
372  ReduceTensorImpl(
373  ndim,
374  X_dims,
375  Y_dims,
376  [](const T a, const T b) { return a + std::abs(b); },
377  T(0),
378  X,
379  Y,
380  context);
381  const int Y_size =
382  std::accumulate(Y_dims, Y_dims + ndim, 1, std::multiplies<int>());
383  Scale<T, T, CPUContext>(Y_size, alpha, Y, Y, context);
384 }
385 
386 template <typename T>
387 void ReduceL2Impl(
388  const int ndim,
389  const int* X_dims,
390  const int* Y_dims,
391  const T alpha,
392  const T* X,
393  T* Y,
394  CPUContext* context) {
395  ReduceTensorImpl(
396  ndim,
397  X_dims,
398  Y_dims,
399  [](const T a, const T b) { return a + b * b; },
400  T(0),
401  X,
402  Y,
403  context);
404  const int Y_size =
405  std::accumulate(Y_dims, Y_dims + ndim, 1, std::multiplies<int>());
406  EigenVectorArrayMap<T> Y_arr(Y, Y_size);
407  Y_arr = Y_arr.sqrt() * alpha;
408 }
409 
410 template <typename T>
411 void RowwiseMoments(
412  const int rows,
413  const int cols,
414  const T* X,
415  T* mean,
416  T* var) {
417  ConstEigenArrayMap<T> X_arr(X, cols, rows);
418  for (int i = 0; i < rows; ++i) {
419  mean[i] = X_arr.col(i).mean();
420  var[i] = X_arr.col(i).square().mean() - mean[i] * mean[i];
421  }
422 }
423 
424 template <typename T>
425 void ColwiseMoments(
426  const int rows,
427  const int cols,
428  const T* X,
429  T* mean,
430  T* var) {
431  ConstEigenArrayMap<T> X_arr(X, cols, rows);
432  EigenVectorArrayMap<T> mean_arr(mean, cols);
433  EigenVectorArrayMap<T> var_arr(var, cols);
434  mean_arr = X_arr.col(0);
435  var_arr = X_arr.col(0).square();
436  for (int i = 1; i < rows; ++i) {
437  mean_arr += X_arr.col(i);
438  var_arr += X_arr.col(i).square();
439  }
440  const T scale = T(1) / static_cast<T>(rows);
441  mean_arr *= scale;
442  var_arr = var_arr * scale - mean_arr.square();
443 }
444 
445 template <typename T>
446 void BothEndsMoments(
447  const int M,
448  const int N,
449  const int K,
450  const T* X,
451  T* mean,
452  T* var) {
453  ConstEigenArrayMap<T> X_arr(X, K, M * N);
454  EigenVectorArrayMap<T> mean_arr(mean, N);
455  EigenVectorArrayMap<T> var_arr(var, N);
456  for (int i = 0; i < N; ++i) {
457  mean_arr(i) = X_arr.col(i).sum();
458  var_arr(i) = X_arr.col(i).square().sum();
459  }
460  for (int i = 1; i < M; ++i) {
461  for (int j = 0; j < N; ++j) {
462  const int c = i * N + j;
463  mean_arr(j) += X_arr.col(c).sum();
464  var_arr(j) += X_arr.col(c).square().sum();
465  }
466  }
467  const T scale = T(1) / static_cast<T>(M * K);
468  mean_arr *= scale;
469  var_arr = var_arr * scale - mean_arr.square();
470 }
471 
472 template <typename T>
473 void MomentsImpl(
474  const int ndim,
475  const int* X_dims,
476  const int* Y_dims,
477  const T* X,
478  T* mean,
479  T* var,
480  CPUContext* /* context */) {
481  const int X_size =
482  std::accumulate(X_dims, X_dims + ndim, 1, std::multiplies<int>());
483  const int Y_size =
484  std::accumulate(Y_dims, Y_dims + ndim, 1, std::multiplies<int>());
485  if (X_size == 0) {
486  std::memset(mean, 0, sizeof(T) * Y_size);
487  std::memset(var, 0, sizeof(T) * Y_size);
488  return;
489  }
490  if (std::equal(X_dims, X_dims + ndim, Y_dims)) {
491  std::memcpy(mean, X, sizeof(T) * Y_size);
492  std::memset(var, 0, sizeof(T) * Y_size);
493  return;
494  }
495  int rows;
496  int cols;
497  if (utils::IsRowwiseReduce(ndim, X_dims, Y_dims, &rows, &cols)) {
498  RowwiseMoments<T>(rows, cols, X, mean, var);
499  return;
500  }
501  if (utils::IsColwiseReduce(ndim, X_dims, Y_dims, &rows, &cols)) {
502  ColwiseMoments<T>(rows, cols, X, mean, var);
503  return;
504  }
505  int pre;
506  int mid;
507  int nxt;
508  if (utils::IsBothEndsReduce(ndim, X_dims, Y_dims, &pre, &mid, &nxt)) {
509  BothEndsMoments<T>(pre, mid, nxt, X, mean, var);
510  return;
511  }
512  std::memset(mean, 0, sizeof(T) * Y_size);
513  std::memset(var, 0, sizeof(T) * Y_size);
514  std::vector<int> index(ndim, 0);
515  for (int X_index = 0; X_index < X_size; ++X_index) {
516  const int Y_index = utils::GetIndexFromDims(ndim, Y_dims, index.data());
517  mean[Y_index] += X[X_index];
518  var[Y_index] += X[X_index] * X[X_index];
519  utils::IncreaseIndexInDims(ndim, X_dims, index.data());
520  }
521  const T scale = static_cast<T>(Y_size) / static_cast<T>(X_size);
522  EigenVectorArrayMap<T> mean_arr(mean, Y_size);
523  EigenVectorArrayMap<T> var_arr(var, Y_size);
524  mean_arr *= scale;
525  var_arr = var_arr * scale - mean_arr.square();
526 }
527 
528 } // namespace
529 
530 #define DELEGATE_GLOBAL_REDUCE_FUNCTION(T, Func, EigenFunc) \
531  template <> \
532  C10_EXPORT void Func<T, CPUContext>( \
533  const int N, \
534  const T* X, \
535  T* Y, \
536  Tensor* /* scratch_ptr */, \
537  CPUContext* /* context */) { \
538  *Y = ConstEigenVectorArrayMap<T>(X, N).EigenFunc(); \
539  }
540 DELEGATE_GLOBAL_REDUCE_FUNCTION(float, ReduceMin, minCoeff)
541 DELEGATE_GLOBAL_REDUCE_FUNCTION(std::int32_t, ReduceMin, minCoeff)
542 DELEGATE_GLOBAL_REDUCE_FUNCTION(std::int64_t, ReduceMin, minCoeff)
543 DELEGATE_GLOBAL_REDUCE_FUNCTION(float, ReduceMax, maxCoeff)
544 DELEGATE_GLOBAL_REDUCE_FUNCTION(std::int32_t, ReduceMax, maxCoeff)
545 DELEGATE_GLOBAL_REDUCE_FUNCTION(std::int64_t, ReduceMax, maxCoeff)
546 #undef DELEGATE_GLOBAL_REDUCE_FUNCTION
547 
548 #define DELEGATE_REDUCE_FUNCTION(T, Func, kInit, kIsNorm) \
549  template <> \
550  C10_EXPORT void Func<T, CPUContext>( \
551  const int ndim, \
552  const int* X_dims, \
553  const int* Y_dims, \
554  const T alpha, \
555  const T* X, \
556  T* Y, \
557  CPUContext* context) { \
558  const int X_size = \
559  std::accumulate(X_dims, X_dims + ndim, 1, std::multiplies<int>()); \
560  const int Y_size = \
561  std::accumulate(Y_dims, Y_dims + ndim, 1, std::multiplies<int>()); \
562  if (X_size == 0) { \
563  Set<T, CPUContext>(Y_size, alpha * kInit, Y, context); \
564  return; \
565  } \
566  if (alpha == T(0)) { \
567  std::memset(Y, 0, sizeof(T) * Y_size); \
568  return; \
569  } \
570  if (std::equal(X_dims, X_dims + ndim, Y_dims)) { \
571  if (kIsNorm) { \
572  EigenVectorArrayMap<T>(Y, Y_size) = \
573  ConstEigenVectorArrayMap<T>(X, X_size).abs() * alpha; \
574  } else { \
575  Scale<T, T, CPUContext>(Y_size, alpha, X, Y, context); \
576  } \
577  return; \
578  } \
579  int rows; \
580  int cols; \
581  if (utils::IsRowwiseReduce(ndim, X_dims, Y_dims, &rows, &cols)) { \
582  Rowwise##Func<T>(rows, cols, alpha, X, Y, context); \
583  return; \
584  } \
585  if (utils::IsColwiseReduce(ndim, X_dims, Y_dims, &rows, &cols)) { \
586  Colwise##Func<T>(rows, cols, alpha, X, Y, context); \
587  return; \
588  } \
589  int M; \
590  int N; \
591  int K; \
592  if (utils::IsBothEndsReduce(ndim, X_dims, Y_dims, &M, &N, &K)) { \
593  BothEnds##Func<T>(M, N, K, alpha, X, Y, context); \
594  return; \
595  } \
596  Func##Impl<T>(ndim, X_dims, Y_dims, alpha, X, Y, context); \
597  }
598 DELEGATE_REDUCE_FUNCTION(
599  float,
600  ReduceMin,
601  std::numeric_limits<float>::max(),
602  false)
603 DELEGATE_REDUCE_FUNCTION(
604  double,
605  ReduceMin,
606  std::numeric_limits<double>::max(),
607  false)
608 DELEGATE_REDUCE_FUNCTION(
609  std::int32_t,
610  ReduceMin,
611  std::numeric_limits<std::int32_t>::max(),
612  false)
613 DELEGATE_REDUCE_FUNCTION(
614  std::int64_t,
615  ReduceMin,
616  std::numeric_limits<std::int64_t>::max(),
617  false)
618 DELEGATE_REDUCE_FUNCTION(
619  float,
620  ReduceMax,
621  std::numeric_limits<float>::lowest(),
622  false)
623 DELEGATE_REDUCE_FUNCTION(
624  double,
625  ReduceMax,
626  std::numeric_limits<double>::lowest(),
627  false)
628 DELEGATE_REDUCE_FUNCTION(
629  std::int32_t,
630  ReduceMax,
631  std::numeric_limits<std::int32_t>::lowest(),
632  false)
633 DELEGATE_REDUCE_FUNCTION(
634  std::int64_t,
635  ReduceMax,
636  std::numeric_limits<std::int64_t>::lowest(),
637  false)
638 DELEGATE_REDUCE_FUNCTION(float, ReduceSum, 0.0f, false)
639 DELEGATE_REDUCE_FUNCTION(double, ReduceSum, 0.0, false)
640 DELEGATE_REDUCE_FUNCTION(std::int32_t, ReduceSum, 0, false)
641 DELEGATE_REDUCE_FUNCTION(std::int64_t, ReduceSum, 0LL, false)
642 DELEGATE_REDUCE_FUNCTION(float, ReduceMean, 0.0f, false)
643 DELEGATE_REDUCE_FUNCTION(double, ReduceMean, 0.0, false)
644 DELEGATE_REDUCE_FUNCTION(float, ReduceL1, 0.0f, true)
645 DELEGATE_REDUCE_FUNCTION(double, ReduceL1, 0.0, true)
646 DELEGATE_REDUCE_FUNCTION(std::int32_t, ReduceL1, 0, true)
647 DELEGATE_REDUCE_FUNCTION(std::int64_t, ReduceL1, 0LL, true)
648 DELEGATE_REDUCE_FUNCTION(float, ReduceL2, 0.0f, true)
649 DELEGATE_REDUCE_FUNCTION(double, ReduceL2, 0.0, true)
650 #undef DELEGATE_REDUCE_FUNCTION
651 
652 #define CAFFE2_SPECIALIZED_MOMENTS(T) \
653  template <> \
654  C10_EXPORT void Moments<T, CPUContext>( \
655  const int ndim, \
656  const int* X_dims, \
657  const int* Y_dims, \
658  const T* X, \
659  T* mean, \
660  T* var, \
661  CPUContext* context) { \
662  MomentsImpl<T>(ndim, X_dims, Y_dims, X, mean, var, context); \
663  }
664 CAFFE2_SPECIALIZED_MOMENTS(float)
665 #undef CAFFE2_SPECIALIZED_MOMENTS
666 
667 } // namespace math
668 } // namespace caffe2
Definition: any.cpp:108
A global dictionary that holds information about what Caffe2 modules have been loaded in the current ...
Definition: blob.h:13
Definition: OpClasses.h:659