Caffe2 - C++ API
A deep learning, cross platform ML framework
pool_op.cc
1 #include "caffe2/operators/pool_op.h"
2 
3 #include <limits>
4 #include <string>
5 #include <type_traits>
6 
7 #include "caffe2/operators/pool_op_util.h"
8 #include "caffe2/utils/eigen_utils.h"
9 #include "caffe2/utils/math.h"
10 
11 namespace caffe2 {
12 
13 namespace {
14 
15 template <typename T, StorageOrder kOrder>
16 void ComputeAveragePool1D(
17  int l,
18  int r,
19  int y,
20  T scale,
21  const ConstEigenArrayMap<T>& X_arr,
22  EigenArrayMap<T>* Y_arr);
23 
24 template <>
25 void ComputeAveragePool1D<float, StorageOrder::NCHW>(
26  const int l,
27  const int r,
28  const int y,
29  const float scale,
30  const ConstEigenArrayMap<float>& X_arr,
31  EigenArrayMap<float>* Y_arr) {
32  (*Y_arr)(y) = X_arr.col(0).segment(l, r - l).sum() * scale;
33 }
34 
35 template <>
36 void ComputeAveragePool1D<float, StorageOrder::NHWC>(
37  const int l,
38  const int r,
39  const int y,
40  const float scale,
41  const ConstEigenArrayMap<float>& X_arr,
42  EigenArrayMap<float>* Y_arr) {
43  Y_arr->col(y) = X_arr.col(l);
44  for (int i = l + 1; i < r; ++i) {
45  Y_arr->col(y) += X_arr.col(i);
46  }
47  Y_arr->col(y) *= scale;
48 }
49 
50 template <typename T, StorageOrder kOrder>
51 void ComputeAveragePool2D(
52  int W,
53  int t,
54  int b,
55  int l,
56  int r,
57  int y,
58  T scale,
59  const ConstEigenArrayMap<T>& X_arr,
60  EigenArrayMap<T>* Y_arr);
61 
62 template <>
63 void ComputeAveragePool2D<float, StorageOrder::NCHW>(
64  const int /* W */,
65  const int t,
66  const int b,
67  const int l,
68  const int r,
69  const int y,
70  const float scale,
71  const ConstEigenArrayMap<float>& X_arr,
72  EigenArrayMap<float>* Y_arr) {
73  (*Y_arr)(y) = X_arr.block(l, t, r - l, b - t).sum() * scale;
74 }
75 
76 template <>
77 void ComputeAveragePool2D<float, StorageOrder::NHWC>(
78  const int W,
79  const int t,
80  const int b,
81  const int l,
82  const int r,
83  const int y,
84  const float scale,
85  const ConstEigenArrayMap<float>& X_arr,
86  EigenArrayMap<float>* Y_arr) {
87  Y_arr->col(y).setZero();
88  for (int i = t; i < b; ++i) {
89  for (int j = l; j < r; ++j) {
90  Y_arr->col(y) += X_arr.col(i * W + j);
91  }
92  }
93  Y_arr->col(y) *= scale;
94 }
95 
96 template <typename T, StorageOrder kOrder>
97 void ComputeAveragePool3D(
98  int H,
99  int W,
100  int p,
101  int a,
102  int t,
103  int b,
104  int l,
105  int r,
106  int y,
107  T scale,
108  const ConstEigenArrayMap<T>& X_arr,
109  EigenArrayMap<T>* Y_arr);
110 
111 template <>
112 void ComputeAveragePool3D<float, StorageOrder::NCHW>(
113  const int H,
114  const int /* W */,
115  const int p,
116  const int a,
117  const int t,
118  const int b,
119  const int l,
120  const int r,
121  const int y,
122  const float scale,
123  const ConstEigenArrayMap<float>& X_arr,
124  EigenArrayMap<float>* Y_arr) {
125  (*Y_arr)(y) = 0;
126  for (int i = p; i < a; ++i) {
127  (*Y_arr)(y) += X_arr.block(l, i * H + t, r - l, b - t).sum();
128  }
129  (*Y_arr)(y) *= scale;
130 }
131 
132 template <>
133 void ComputeAveragePool3D<float, StorageOrder::NHWC>(
134  const int H,
135  const int W,
136  const int p,
137  const int a,
138  const int t,
139  const int b,
140  const int l,
141  const int r,
142  const int y,
143  const float scale,
144  const ConstEigenArrayMap<float>& X_arr,
145  EigenArrayMap<float>* Y_arr) {
146  Y_arr->col(y).setZero();
147  for (int i = p; i < a; ++i) {
148  for (int j = t; j < b; ++j) {
149  for (int k = l; k < r; ++k) {
150  Y_arr->col(y) += X_arr.col(i * H * W + j * W + k);
151  }
152  }
153  }
154  Y_arr->col(y) *= scale;
155 }
156 
157 template <typename T, StorageOrder kOrder>
158 void RunAveragePool1D(
159  const int N,
160  const int C,
161  const int X_size,
162  const int Y_size,
163  const int kernel,
164  const int stride,
165  const int pad,
166  const bool count_include_pad,
167  const T* X,
168  T* Y) {
169  const int batch_size = kOrder == StorageOrder::NCHW ? N * C : N;
170  const int X_stride = kOrder == StorageOrder::NCHW ? X_size : X_size * C;
171  const int Y_stride = kOrder == StorageOrder::NCHW ? Y_size : Y_size * C;
172  const T* X_ptr = X;
173  T* Y_ptr = Y;
174  for (int i = 0; i < batch_size; ++i) {
175  ConstEigenArrayMap<T> X_arr = kOrder == StorageOrder::NCHW
176  ? ConstEigenArrayMap<T>(X_ptr, X_size, 1)
177  : ConstEigenArrayMap<T>(X_ptr, C, X_size);
178  EigenArrayMap<T> Y_arr = kOrder == StorageOrder::NCHW
179  ? EigenArrayMap<T>(Y_ptr, Y_size, 1)
180  : EigenArrayMap<T>(Y_ptr, C, Y_size);
181  for (int y = 0; y < Y_size; ++y) {
182  const int l = std::max(y * stride - pad, 0);
183  const int r = std::min(y * stride - pad + kernel, X_size);
184  const T scale = T(1) / static_cast<T>(count_include_pad ? kernel : r - l);
185  ComputeAveragePool1D<T, kOrder>(l, r, y, scale, X_arr, &Y_arr);
186  }
187  X_ptr += X_stride;
188  Y_ptr += Y_stride;
189  }
190 }
191 
192 template <typename T, StorageOrder kOrder>
193 void RunAveragePool2D(
194  const int N,
195  const int C,
196  const int X_H,
197  const int X_W,
198  const int Y_H,
199  const int Y_W,
200  const int kernel_h,
201  const int kernel_w,
202  const int stride_h,
203  const int stride_w,
204  const int pad_t,
205  const int pad_l,
206  const bool count_include_pad,
207  const T* X,
208  T* Y) {
209  const int batch_size = kOrder == StorageOrder::NCHW ? N * C : N;
210  const int X_HxW = X_H * X_W;
211  const int Y_HxW = Y_H * Y_W;
212  const int X_stride = kOrder == StorageOrder::NCHW ? X_HxW : X_HxW * C;
213  const int Y_stride = kOrder == StorageOrder::NCHW ? Y_HxW : Y_HxW * C;
214  const T* X_ptr = X;
215  T* Y_ptr = Y;
216  for (int i = 0; i < batch_size; ++i) {
217  ConstEigenArrayMap<T> X_arr = kOrder == StorageOrder::NCHW
218  ? ConstEigenArrayMap<T>(X_ptr, X_W, X_H)
219  : ConstEigenArrayMap<T>(X_ptr, C, X_HxW);
220  EigenArrayMap<T> Y_arr = kOrder == StorageOrder::NCHW
221  ? EigenArrayMap<T>(Y_ptr, Y_W, Y_H)
222  : EigenArrayMap<T>(Y_ptr, C, Y_HxW);
223  for (int h = 0; h < Y_H; ++h) {
224  const int t = std::max(h * stride_h - pad_t, 0);
225  const int b = std::min(h * stride_h - pad_t + kernel_h, X_H);
226  for (int w = 0; w < Y_W; ++w) {
227  const int l = std::max(w * stride_w - pad_l, 0);
228  const int r = std::min(w * stride_w - pad_l + kernel_w, X_W);
229  const int y = h * Y_W + w;
230  const T scale = T(1) /
231  static_cast<T>(count_include_pad ? kernel_h * kernel_w
232  : (b - t) * (r - l));
233  ComputeAveragePool2D<T, kOrder>(
234  X_W, t, b, l, r, y, scale, X_arr, &Y_arr);
235  }
236  }
237  X_ptr += X_stride;
238  Y_ptr += Y_stride;
239  }
240 }
241 
242 template <typename T, StorageOrder kOrder>
243 void RunAveragePool3D(
244  const int N,
245  const int C,
246  const int X_D,
247  const int X_H,
248  const int X_W,
249  const int Y_D,
250  const int Y_H,
251  const int Y_W,
252  const int kernel_d,
253  const int kernel_h,
254  const int kernel_w,
255  const int stride_d,
256  const int stride_h,
257  const int stride_w,
258  const int pad_p,
259  const int pad_t,
260  const int pad_l,
261  const bool count_include_pad,
262  const T* X,
263  T* Y) {
264  const int batch_size = kOrder == StorageOrder::NCHW ? N * C : N;
265  const int X_HxW = X_D * X_H * X_W;
266  const int Y_HxW = Y_D * Y_H * Y_W;
267  const int X_stride = kOrder == StorageOrder::NCHW ? X_HxW : X_HxW * C;
268  const int Y_stride = kOrder == StorageOrder::NCHW ? Y_HxW : Y_HxW * C;
269  const T* X_ptr = X;
270  T* Y_ptr = Y;
271  for (int i = 0; i < batch_size; ++i) {
272  ConstEigenArrayMap<T> X_arr = kOrder == StorageOrder::NCHW
273  ? ConstEigenArrayMap<T>(X_ptr, X_W, X_D * X_H)
274  : ConstEigenArrayMap<T>(X_ptr, C, X_HxW);
275  EigenArrayMap<T> Y_arr = kOrder == StorageOrder::NCHW
276  ? EigenArrayMap<T>(Y_ptr, Y_W, Y_D * Y_H)
277  : EigenArrayMap<T>(Y_ptr, C, Y_HxW);
278  for (int d = 0; d < Y_D; ++d) {
279  const int p = std::max(d * stride_d - pad_p, 0);
280  const int a = std::min(d * stride_d - pad_p + kernel_d, X_D);
281  for (int h = 0; h < Y_H; ++h) {
282  const int t = std::max(h * stride_h - pad_t, 0);
283  const int b = std::min(h * stride_h - pad_t + kernel_h, X_H);
284  for (int w = 0; w < Y_W; ++w) {
285  const int l = std::max(w * stride_w - pad_l, 0);
286  const int r = std::min(w * stride_w - pad_l + kernel_w, X_W);
287  const int y = d * Y_H * Y_W + h * Y_W + w;
288  const T scale = T(1) /
289  static_cast<T>(count_include_pad ? kernel_d * kernel_h * kernel_w
290  : (a - p) * (b - t) * (r - l));
291  ComputeAveragePool3D<T, kOrder>(
292  X_H, X_W, p, a, t, b, l, r, y, scale, X_arr, &Y_arr);
293  }
294  }
295  }
296  X_ptr += X_stride;
297  Y_ptr += Y_stride;
298  }
299 }
300 
301 template <typename T, StorageOrder kOrder>
302 void ComputeMaxPool1D(
303  int l,
304  int r,
305  int y,
306  const ConstEigenArrayMap<T>& X_arr,
307  EigenArrayMap<T>* Y_arr);
308 
309 template <>
310 void ComputeMaxPool1D<float, StorageOrder::NCHW>(
311  const int l,
312  const int r,
313  const int y,
314  const ConstEigenArrayMap<float>& X_arr,
315  EigenArrayMap<float>* Y_arr) {
316  (*Y_arr)(y) = X_arr.col(0).segment(l, r - l).maxCoeff();
317 }
318 
319 template <>
320 void ComputeMaxPool1D<float, StorageOrder::NHWC>(
321  const int l,
322  const int r,
323  const int y,
324  const ConstEigenArrayMap<float>& X_arr,
325  EigenArrayMap<float>* Y_arr) {
326  Y_arr->col(y) = X_arr.col(l);
327  for (int i = l + 1; i < r; ++i) {
328  Y_arr->col(y) = Y_arr->col(y).max(X_arr.col(i));
329  }
330 }
331 
332 template <typename T, StorageOrder kOrder>
333 void ComputeMaxPool2D(
334  int W,
335  int t,
336  int b,
337  int l,
338  int r,
339  int y,
340  const ConstEigenArrayMap<T>& X_arr,
341  EigenArrayMap<T>* Y_arr);
342 
343 template <>
344 void ComputeMaxPool2D<float, StorageOrder::NCHW>(
345  const int /* W */,
346  const int t,
347  const int b,
348  const int l,
349  const int r,
350  const int y,
351  const ConstEigenArrayMap<float>& X_arr,
352  EigenArrayMap<float>* Y_arr) {
353  (*Y_arr)(y) = X_arr.block(l, t, r - l, b - t).maxCoeff();
354 }
355 
356 template <>
357 void ComputeMaxPool2D<float, StorageOrder::NHWC>(
358  const int W,
359  const int t,
360  const int b,
361  const int l,
362  const int r,
363  const int y,
364  const ConstEigenArrayMap<float>& X_arr,
365  EigenArrayMap<float>* Y_arr) {
366  Y_arr->col(y).setConstant(std::numeric_limits<float>::lowest());
367  for (int i = t; i < b; ++i) {
368  for (int j = l; j < r; ++j) {
369  Y_arr->col(y) = Y_arr->col(y).max(X_arr.col(i * W + j));
370  }
371  }
372 }
373 
374 template <typename T, StorageOrder kOrder>
375 void ComputeMaxPool3D(
376  int H,
377  int W,
378  int p,
379  int a,
380  int t,
381  int b,
382  int l,
383  int r,
384  int y,
385  const ConstEigenArrayMap<T>& X_arr,
386  EigenArrayMap<T>* Y_arr);
387 
388 template <>
389 void ComputeMaxPool3D<float, StorageOrder::NCHW>(
390  const int H,
391  const int /* W */,
392  const int p,
393  const int a,
394  const int t,
395  const int b,
396  const int l,
397  const int r,
398  const int y,
399  const ConstEigenArrayMap<float>& X_arr,
400  EigenArrayMap<float>* Y_arr) {
401  (*Y_arr)(y) = std::numeric_limits<float>::lowest();
402  for (int i = p; i < a; ++i) {
403  (*Y_arr)(y) = std::max(
404  (*Y_arr)(y), X_arr.block(l, i * H + t, r - l, b - t).maxCoeff());
405  }
406 }
407 
408 template <>
409 void ComputeMaxPool3D<float, StorageOrder::NHWC>(
410  const int H,
411  const int W,
412  const int p,
413  const int a,
414  const int t,
415  const int b,
416  const int l,
417  const int r,
418  const int y,
419  const ConstEigenArrayMap<float>& X_arr,
420  EigenArrayMap<float>* Y_arr) {
421  Y_arr->col(y).setConstant(std::numeric_limits<float>::lowest());
422  for (int i = p; i < a; ++i) {
423  for (int j = t; j < b; ++j) {
424  for (int k = l; k < r; ++k) {
425  Y_arr->col(y) = Y_arr->col(y).max(X_arr.col(i * H * W + j * W + k));
426  }
427  }
428  }
429 }
430 
431 template <typename T, StorageOrder kOrder>
432 void RunMaxPool1D(
433  const int N,
434  const int C,
435  const int X_size,
436  const int Y_size,
437  const int kernel,
438  const int stride,
439  const int pad,
440  const T* X,
441  T* Y) {
442  const int batch_size = kOrder == StorageOrder::NCHW ? N * C : N;
443  const int X_stride = kOrder == StorageOrder::NCHW ? X_size : X_size * C;
444  const int Y_stride = kOrder == StorageOrder::NCHW ? Y_size : Y_size * C;
445  const T* X_ptr = X;
446  T* Y_ptr = Y;
447  for (int i = 0; i < batch_size; ++i) {
448  ConstEigenArrayMap<T> X_arr = kOrder == StorageOrder::NCHW
449  ? ConstEigenArrayMap<T>(X_ptr, X_size, 1)
450  : ConstEigenArrayMap<T>(X_ptr, C, X_size);
451  EigenArrayMap<T> Y_arr = kOrder == StorageOrder::NCHW
452  ? EigenArrayMap<T>(Y_ptr, Y_size, 1)
453  : EigenArrayMap<T>(Y_ptr, C, Y_size);
454  for (int y = 0; y < Y_size; ++y) {
455  const int l = std::max(y * stride - pad, 0);
456  const int r = std::min(y * stride - pad + kernel, X_size);
457  ComputeMaxPool1D<T, kOrder>(l, r, y, X_arr, &Y_arr);
458  }
459  X_ptr += X_stride;
460  Y_ptr += Y_stride;
461  }
462 }
463 
464 template <typename T, StorageOrder kOrder>
465 void RunMaxPool2D(
466  const int N,
467  const int C,
468  const int X_H,
469  const int X_W,
470  const int Y_H,
471  const int Y_W,
472  const int kernel_h,
473  const int kernel_w,
474  const int stride_h,
475  const int stride_w,
476  const int pad_t,
477  const int pad_l,
478  const T* X,
479  T* Y) {
480  const int batch_size = kOrder == StorageOrder::NCHW ? N * C : N;
481  const int X_HxW = X_H * X_W;
482  const int Y_HxW = Y_H * Y_W;
483  const int X_stride = kOrder == StorageOrder::NCHW ? X_HxW : X_HxW * C;
484  const int Y_stride = kOrder == StorageOrder::NCHW ? Y_HxW : Y_HxW * C;
485  const T* X_ptr = X;
486  T* Y_ptr = Y;
487  for (int i = 0; i < batch_size; ++i) {
488  ConstEigenArrayMap<T> X_arr = kOrder == StorageOrder::NCHW
489  ? ConstEigenArrayMap<T>(X_ptr, X_W, X_H)
490  : ConstEigenArrayMap<T>(X_ptr, C, X_HxW);
491  EigenArrayMap<T> Y_arr = kOrder == StorageOrder::NCHW
492  ? EigenArrayMap<T>(Y_ptr, Y_W, Y_H)
493  : EigenArrayMap<T>(Y_ptr, C, Y_HxW);
494  for (int h = 0; h < Y_H; ++h) {
495  const int t = std::max(h * stride_h - pad_t, 0);
496  const int b = std::min(h * stride_h - pad_t + kernel_h, X_H);
497  for (int w = 0; w < Y_W; ++w) {
498  const int l = std::max(w * stride_w - pad_l, 0);
499  const int r = std::min(w * stride_w - pad_l + kernel_w, X_W);
500  const int y = h * Y_W + w;
501  ComputeMaxPool2D<T, kOrder>(X_W, t, b, l, r, y, X_arr, &Y_arr);
502  }
503  }
504  X_ptr += X_stride;
505  Y_ptr += Y_stride;
506  }
507 }
508 template <typename T, StorageOrder kOrder>
509 void RunMaxPool3D(
510  const int N,
511  const int C,
512  const int X_D,
513  const int X_H,
514  const int X_W,
515  const int Y_D,
516  const int Y_H,
517  const int Y_W,
518  const int kernel_d,
519  const int kernel_h,
520  const int kernel_w,
521  const int stride_d,
522  const int stride_h,
523  const int stride_w,
524  const int pad_p,
525  const int pad_t,
526  const int pad_l,
527  const T* X,
528  T* Y) {
529  const int batch_size = kOrder == StorageOrder::NCHW ? N * C : N;
530  const int X_HxW = X_D * X_H * X_W;
531  const int Y_HxW = Y_D * Y_H * Y_W;
532  const int X_stride = kOrder == StorageOrder::NCHW ? X_HxW : X_HxW * C;
533  const int Y_stride = kOrder == StorageOrder::NCHW ? Y_HxW : Y_HxW * C;
534  const T* X_ptr = X;
535  T* Y_ptr = Y;
536  for (int i = 0; i < batch_size; ++i) {
537  ConstEigenArrayMap<T> X_arr = kOrder == StorageOrder::NCHW
538  ? ConstEigenArrayMap<T>(X_ptr, X_W, X_D * X_H)
539  : ConstEigenArrayMap<T>(X_ptr, C, X_HxW);
540  EigenArrayMap<T> Y_arr = kOrder == StorageOrder::NCHW
541  ? EigenArrayMap<T>(Y_ptr, Y_W, Y_D * Y_H)
542  : EigenArrayMap<T>(Y_ptr, C, Y_HxW);
543  for (int d = 0; d < Y_D; ++d) {
544  const int p = std::max(d * stride_d - pad_p, 0);
545  const int a = std::min(d * stride_d - pad_p + kernel_d, X_D);
546  for (int h = 0; h < Y_H; ++h) {
547  const int t = std::max(h * stride_h - pad_t, 0);
548  const int b = std::min(h * stride_h - pad_t + kernel_h, X_H);
549  for (int w = 0; w < Y_W; ++w) {
550  const int l = std::max(w * stride_w - pad_l, 0);
551  const int r = std::min(w * stride_w - pad_l + kernel_w, X_W);
552  const int y = d * Y_H * Y_W + h * Y_W + w;
553  ComputeMaxPool3D<T, kOrder>(
554  X_H, X_W, p, a, t, b, l, r, y, X_arr, &Y_arr);
555  }
556  }
557  }
558  X_ptr += X_stride;
559  Y_ptr += Y_stride;
560  }
561 }
562 
563 } // namespace
564 
565 template <>
566 template <>
567 bool AveragePoolFunctor<CPUContext>::
568  GlobalPoolingForward<float, StorageOrder::NCHW>(
569  const int N,
570  const int C,
571  const int HxW,
572  const float* X,
573  float* Y,
574  CPUContext* context) const {
575  const std::array<int, 2> X_dims = {N * C, HxW};
576  const std::array<int, 2> Y_dims = {N * C, 1};
577  math::ReduceMean<float, CPUContext>(
578  2, X_dims.data(), Y_dims.data(), 1.0f, X, Y, context);
579  return true;
580 }
581 
582 template <>
583 template <>
584 bool AveragePoolFunctor<CPUContext>::
585  GlobalPoolingForward<float, StorageOrder::NHWC>(
586  const int N,
587  const int C,
588  const int HxW,
589  const float* X,
590  float* Y,
591  CPUContext* context) const {
592  math::Set<float, CPUContext>(N * C, 0.0f, Y, context);
593  const float* X_ptr = X;
594  float* Y_ptr = Y;
595  for (int i = 0; i < N; ++i) {
596  for (int j = 0; j < HxW; ++j) {
597  math::Add<float, CPUContext>(C, Y_ptr, X_ptr + j * C, Y_ptr, context);
598  }
599  X_ptr += HxW * C;
600  Y_ptr += C;
601  }
602  math::Scale<float, float, CPUContext>(
603  N * C, 1.0f / static_cast<float>(HxW), Y, Y, context);
604  return true;
605 }
606 
607 #define CAFFE2_SPECIALIZED_AVERAGE_POOL_FUNCTOR_FORWARD(T, kOrder) \
608  template <> \
609  template <> \
610  bool AveragePoolFunctor<CPUContext>::Forward<T, kOrder>( \
611  const int N, \
612  const int C, \
613  const std::vector<int>& X_dims, \
614  const std::vector<int>& Y_dims, \
615  const std::vector<int>& kernel, \
616  const std::vector<int>& dilation, \
617  const std::vector<int>& stride, \
618  const std::vector<int>& pads, \
619  const T* X, \
620  T* Y, \
621  CPUContext* /* context */) const { \
622  const int ndim = X_dims.size(); \
623  switch (ndim) { \
624  case 1: { \
625  RunAveragePool1D<T, kOrder>( \
626  N, \
627  C, \
628  X_dims[0], \
629  Y_dims[0], \
630  kernel[0], \
631  stride[0], \
632  pads[0], \
633  count_include_pad, \
634  X, \
635  Y); \
636  return true; \
637  } \
638  case 2: { \
639  if (std::is_same<T, float>::value && kOrder == StorageOrder::NCHW && \
640  pool_op_util::IsNeon4x4p0s0Eligible( \
641  X_dims[0], \
642  X_dims[1], \
643  Y_dims[0], \
644  Y_dims[1], \
645  kernel[0], \
646  kernel[1], \
647  stride[0], \
648  stride[1], \
649  pads[0], \
650  pads[1], \
651  pads[2], \
652  pads[3], \
653  dilation[0], \
654  dilation[1], \
655  X, \
656  Y)) { \
657  pool_op_util::RunNeonAveragePool4x4p0s0NCHW( \
658  N, C, X_dims[0], X_dims[1], X, Y); \
659  } else { \
660  RunAveragePool2D<T, kOrder>( \
661  N, \
662  C, \
663  X_dims[0], \
664  X_dims[1], \
665  Y_dims[0], \
666  Y_dims[1], \
667  kernel[0], \
668  kernel[1], \
669  stride[0], \
670  stride[1], \
671  pads[0], \
672  pads[1], \
673  count_include_pad, \
674  X, \
675  Y); \
676  } \
677  return true; \
678  } \
679  case 3: { \
680  RunAveragePool3D<T, kOrder>( \
681  N, \
682  C, \
683  X_dims[0], \
684  X_dims[1], \
685  X_dims[2], \
686  Y_dims[0], \
687  Y_dims[1], \
688  Y_dims[2], \
689  kernel[0], \
690  kernel[1], \
691  kernel[2], \
692  stride[0], \
693  stride[1], \
694  stride[2], \
695  pads[0], \
696  pads[1], \
697  pads[2], \
698  count_include_pad, \
699  X, \
700  Y); \
701  return true; \
702  } \
703  default: { \
704  CAFFE_THROW("Unsupported pooling dim: ", ndim); \
705  return false; \
706  } \
707  } \
708  }
709 CAFFE2_SPECIALIZED_AVERAGE_POOL_FUNCTOR_FORWARD(float, StorageOrder::NCHW)
710 CAFFE2_SPECIALIZED_AVERAGE_POOL_FUNCTOR_FORWARD(float, StorageOrder::NHWC)
711 #undef CAFFE2_SPECIALIZED_AVERAGE_POOL_FUNCTOR_FORWARD
712 
713 template <>
714 template <>
715 bool MaxPoolFunctor<CPUContext>::
716  GlobalPoolingForward<float, StorageOrder::NCHW>(
717  const int N,
718  const int C,
719  const int HxW,
720  const float* X,
721  float* Y,
722  CPUContext* context) const {
723  const std::array<int, 2> X_dims = {N * C, HxW};
724  const std::array<int, 2> Y_dims = {N * C, 1};
725  math::ReduceMax<float, CPUContext>(
726  2, X_dims.data(), Y_dims.data(), 1.0f, X, Y, context);
727  return true;
728 }
729 
730 template <>
731 template <>
732 bool MaxPoolFunctor<CPUContext>::
733  GlobalPoolingForward<float, StorageOrder::NHWC>(
734  const int N,
735  const int C,
736  const int HxW,
737  const float* X,
738  float* Y,
739  CPUContext* context) const {
740  math::Set<float, CPUContext>(
741  N * C, std::numeric_limits<float>::lowest(), Y, context);
742  const float* X_ptr = X;
743  float* Y_ptr = Y;
744  for (int i = 0; i < N; ++i) {
745  ConstEigenArrayMap<float> X_arr(X_ptr, C, HxW);
746  EigenVectorArrayMap<float> Y_arr(Y_ptr, C);
747  for (int j = 0; j < HxW; ++j) {
748  Y_arr = Y_arr.max(X_arr.col(j));
749  }
750  X_ptr += HxW * C;
751  Y_ptr += C;
752  }
753  return true;
754 }
755 
756 #define CAFFE2_SPECIALIZED_MAX_POOL_FUNCTOR_FORWARD(T, kOrder) \
757  template <> \
758  template <> \
759  bool MaxPoolFunctor<CPUContext>::Forward<T, kOrder>( \
760  const int N, \
761  const int C, \
762  const std::vector<int>& X_dims, \
763  const std::vector<int>& Y_dims, \
764  const std::vector<int>& kernel, \
765  const std::vector<int>& dilation, \
766  const std::vector<int>& stride, \
767  const std::vector<int>& pads, \
768  const T* X, \
769  T* Y, \
770  CPUContext* /* context */) const { \
771  const int ndim = X_dims.size(); \
772  switch (ndim) { \
773  case 1: { \
774  RunMaxPool1D<T, kOrder>( \
775  N, C, X_dims[0], Y_dims[0], kernel[0], stride[0], pads[0], X, Y); \
776  return true; \
777  } \
778  case 2: { \
779  if (std::is_same<T, float>::value && kOrder == StorageOrder::NCHW && \
780  pool_op_util::IsNeon2x2p0s0Eligible( \
781  X_dims[0], \
782  X_dims[1], \
783  Y_dims[0], \
784  Y_dims[1], \
785  kernel[0], \
786  kernel[1], \
787  stride[0], \
788  stride[1], \
789  pads[0], \
790  pads[1], \
791  pads[2], \
792  pads[3], \
793  dilation[0], \
794  dilation[1], \
795  X, \
796  Y)) { \
797  pool_op_util::RunNeonMaxPool2x2p0s0NCHW( \
798  N, C, X_dims[0], X_dims[1], X, Y); \
799  } else { \
800  RunMaxPool2D<T, kOrder>( \
801  N, \
802  C, \
803  X_dims[0], \
804  X_dims[1], \
805  Y_dims[0], \
806  Y_dims[1], \
807  kernel[0], \
808  kernel[1], \
809  stride[0], \
810  stride[1], \
811  pads[0], \
812  pads[1], \
813  X, \
814  Y); \
815  } \
816  return true; \
817  } \
818  case 3: { \
819  RunMaxPool3D<T, kOrder>( \
820  N, \
821  C, \
822  X_dims[0], \
823  X_dims[1], \
824  X_dims[2], \
825  Y_dims[0], \
826  Y_dims[1], \
827  Y_dims[2], \
828  kernel[0], \
829  kernel[1], \
830  kernel[2], \
831  stride[0], \
832  stride[1], \
833  stride[2], \
834  pads[0], \
835  pads[1], \
836  pads[2], \
837  X, \
838  Y); \
839  return true; \
840  } \
841  default: { \
842  CAFFE_THROW("Unsupported pooling dim: ", ndim); \
843  return false; \
844  } \
845  } \
846  }
847 CAFFE2_SPECIALIZED_MAX_POOL_FUNCTOR_FORWARD(float, StorageOrder::NCHW)
848 CAFFE2_SPECIALIZED_MAX_POOL_FUNCTOR_FORWARD(float, StorageOrder::NHWC)
849 #undef CAFFE2_SPECIALIZED_MAX_POOL_FUNCTOR_FORWARD
850 
851 constexpr char kAveragePoolDoc[] = R"DOC(
852 consumes an input blob and applies average pooling across the the blob according
853 to kernel sizes, stride sizes, pad lengths and dilation. Average pooling consists
854 of taking the average value of a subset of the input tensor according to the kernel
855 size and downsampling the data into the output blob for further processing. The
856 `brew` module has a wrapper for this operator for use in a `ModelHelper` object.
857 
858 Pooling layers reduce the spatial dimensionality of the input blob. Each of the
859 output blob's dimensions will reduce according to:
860 
861 $$dim_{out}=\frac{dim_{in}-kernel+2*pad}{stride}+1$$
862 
863 Github Links:
864 
865 - https://github.com/pytorch/pytorch/blob/master/caffe2/operators/pool_op.h
866 - https://github.com/pytorch/pytorch/blob/master/caffe2/operators/pool_op.cc
867 - https://github.com/pytorch/pytorch/blob/master/caffe2/operators/conv_pool_op_base.h
868 
869 
870 <details>
871 
872 <summary> <b>Example</b> </summary>
873 
874 **Code**
875 
876 ```
877 workspace.ResetWorkspace()
878 
879 op = core.CreateOperator(
880  "AveragePool",
881  ["X"],
882  ["Y"],
883  kernel=2,
884  stride=2,
885 )
886 
887 workspace.FeedBlob("X", np.random.randn(1, 1, 6, 6).astype(np.float32)) // NCHW
888 print("X:\n", workspace.FetchBlob("X"), "\n")
889 workspace.RunOperatorOnce(op)
890 print("Y:\n", workspace.FetchBlob("Y"))
891 ```
892 
893 **Result**
894 
895 ```
896 X:
897  [[[[-0.2883434 0.43498734 0.05417408 1.912558 0.09390241
898  -0.33173105]
899  [ 1.633709 1.2047161 0.36964908 0.99961185 0.4184147
900  0.9989975 ]
901  [ 1.7644193 0.1789665 1.5812988 -0.6038542 -0.36090398
902  0.33195344]
903  [ 0.9457722 -0.95174325 -0.78124577 1.2062047 1.1903144
904  0.2586746 ]
905  [ 1.252104 0.32645547 1.8073524 -0.78397465 0.9978303
906  -0.97614396]
907  [ 0.5440196 1.5778259 -0.76750124 0.5051756 0.8838398
908  -0.37085298]]]]
909 
910 Y:
911  [[[[0.7462672 0.83399826 0.2948959 ]
912  [0.4843537 0.3506009 0.35500962]
913  [0.9251013 0.19026303 0.13366827]]]]
914 ```
915 
916 </details>
917 
918 )DOC";
919 
920 constexpr char kMaxPoolDoc[] = R"DOC(
921 consumes an input blob and applies max pooling across the the blob according to
922 kernel sizes, stride sizes, pad lengths and dilation. Max pooling consists of
923 taking the maximum value of a subset of the input tensor according to the kernel
924 size and downsampling the data into the output blob for further processing. The
925 `brew` module has a wrapper for this operator for use in a `ModelHelper` object.
926 
927 Pooling layers reduce the spatial dimensionality of the input blob. Each of the
928 output blob's dimensions will reduce according to:
929 
930 $$dim_{out}=\frac{dim_{in}-kernel+2*pad}{stride}+1$$
931 
932 Github Links:
933 
934 - https://github.com/pytorch/pytorch/blob/master/caffe2/operators/pool_op.h
935 - https://github.com/pytorch/pytorch/blob/master/caffe2/operators/pool_op.cc
936 - https://github.com/pytorch/pytorch/blob/master/caffe2/operators/conv_pool_op_base.h
937 
938 <details>
939 
940 <summary> <b>Example</b> </summary>
941 
942 **Code**
943 
944 ```
945 workspace.ResetWorkspace()
946 
947 op = core.CreateOperator(
948  "MaxPool",
949  ["X"],
950  ["Y"],
951  kernel=2,
952  stride=2,
953 )
954 
955 workspace.FeedBlob("X", np.random.randn(1, 1, 6, 6).astype(np.float32)) // NCHW
956 print("X:\n", workspace.FetchBlob("X"), "\n")
957 workspace.RunOperatorOnce(op)
958 print("Y:\n", workspace.FetchBlob("Y"))
959 ```
960 
961 **Result**
962 
963 ```
964 X:
965  [[[[-2.8534958e-01 -1.7719941e+00 -8.2277227e-04 1.1088650e+00
966  -2.1476576e+00 -3.5070452e-01]
967  [-9.0058845e-01 -3.0070004e-01 -1.7907504e+00 -7.1746534e-01
968  1.2798511e+00 -3.2214901e-01]
969  [ 1.5806322e+00 1.6845188e+00 -2.6633200e-01 -3.8576153e-01
970  -9.6424848e-02 -3.9696163e-01]
971  [ 1.2572408e-01 6.3612902e-01 -3.9554062e-01 -6.9735396e-01
972  -9.1898698e-01 -1.9609968e-01]
973  [-1.1587460e+00 2.4605224e+00 -1.5497679e+00 1.3020347e-01
974  -8.1293899e-01 -7.8803545e-01]
975  [ 1.4323474e+00 1.3618395e+00 9.8975077e-02 -1.1307785e-01
976  7.2035044e-01 2.7642491e-01]]]]
977 
978 Y:
979  [[[[-0.28534958 1.108865 1.2798511 ]
980  [ 1.6845188 -0.266332 -0.09642485]
981  [ 2.4605224 0.13020347 0.72035044]]]]
982 
983 ```
984 
985 </details>
986 
987 )DOC";
988 
989 std::function<void(OpSchema&)> AveragePoolDocGenerator(const char* dim) {
990  return [=](OpSchema& schema) {
991  std::string doc = "AveragePool{dim} {pool_doc}";
992  c10::ReplaceAll(doc, "{dim}", dim);
993  c10::ReplaceAll(doc, "{pool_doc}", kAveragePoolDoc);
994  schema.SetDoc(doc);
995  schema.Input(
996  0,
997  "X",
998  "*(type: Tensor`<float>`)* Input data tensor of shape NCHW or NHWC.");
999  schema.Output(0, "Y", "*(type: Tensor`<float>`)* Output data tensor.");
1000  // schema.Arg(
1001  // "kernel", "*(type: int)* Size of the window to take an average
1002  // over.");
1003  // schema.Arg("stride", "*(type: int)* Stride of the window.");
1004  // schema.Arg(
1005  // "pad",
1006  // "*(type: int)* Implicit zero padding to be added on both sides.");
1007  // schema.Arg(
1008  // "dilation",
1009  // "*(type: int)* Parameter that controls the stride of elements in the
1010  // " "window.");
1011  // schema.Arg(
1012  // "order",
1013  // "*(type: string; default: 'NCHW')* Order of the blob dimensions.");
1014  // schema.Arg(
1015  // "count_include_pad",
1016  // "*(type: bool; default: False)* When True, will include the "
1017  // "zero-padding in the averaging.");
1018  };
1019 }
1020 
1021 std::function<void(OpSchema&)> MaxPoolDocGenerator(const char* dim) {
1022  return [=](OpSchema& schema) {
1023  std::string doc = "MaxPool{dim} {pool_doc}";
1024  c10::ReplaceAll(doc, "{dim}", dim);
1025  c10::ReplaceAll(doc, "{pool_doc}", kMaxPoolDoc);
1026  schema.SetDoc(doc);
1027  schema.Input(
1028  0,
1029  "X",
1030  "*(type: Tensor`<float>`)* Input data tensor of shape NCHW or NHWC.");
1031  schema.Output(0, "Y", "*(type: Tensor`<float>`)* Output data tensor.");
1032  /*
1033  schema.Arg("kernel", "*(type: int)* Size of the window to take an average
1034  over."); schema.Arg("stride", "*(type: int)* Stride of the window.");
1035  schema.Arg("pad", "*(type: int)* Implicit zero padding to be added on both
1036  sides."); schema.Arg("dilation", "*(type: int)* Parameter that controls
1037  the stride of elements in the window."); schema.Arg("order", "*(type:
1038  string; default: 'NCHW')* Order of the blob dimensions.");
1039  */
1040  };
1041 }
1042 REGISTER_CPU_OPERATOR(
1043  AveragePool,
1044  PoolOp<float, CPUContext, AveragePoolFunctor<CPUContext>>);
1045 
1046 OPERATOR_SCHEMA(AveragePool)
1047  .NumInputs(1)
1048  .NumOutputs(1)
1049  .TensorInferenceFunction(ConvPoolOpBase<CPUContext>::TensorInferenceForPool)
1050  .FillUsing(AveragePoolDocGenerator(""))
1051  .InheritOnnxSchema();
1052 
1053 REGISTER_CPU_OPERATOR(
1054  AveragePool1D,
1055  PoolOp<float, CPUContext, AveragePoolFunctor<CPUContext>>);
1056 
1057 OPERATOR_SCHEMA(AveragePool1D)
1058  .NumInputs(1)
1059  .NumOutputs(1)
1060  .TensorInferenceFunction(ConvPoolOpBase<CPUContext>::TensorInferenceForPool)
1061  .FillUsing(AveragePoolDocGenerator("1D"))
1062  .InheritOnnxSchema("AveragePool");
1063 
1064 REGISTER_CPU_OPERATOR(
1065  AveragePool2D,
1066  PoolOp<float, CPUContext, AveragePoolFunctor<CPUContext>>);
1067 
1068 OPERATOR_SCHEMA(AveragePool2D)
1069  .NumInputs(1)
1070  .NumOutputs(1)
1071  .TensorInferenceFunction(ConvPoolOpBase<CPUContext>::TensorInferenceForPool)
1072  .FillUsing(AveragePoolDocGenerator("2D"))
1073  .InheritOnnxSchema("AveragePool");
1074 
1075 REGISTER_CPU_OPERATOR(
1076  AveragePool3D,
1077  PoolOp<float, CPUContext, AveragePoolFunctor<CPUContext>>);
1078 
1079 OPERATOR_SCHEMA(AveragePool3D)
1080  .NumInputs(1)
1081  .NumOutputs(1)
1082  .TensorInferenceFunction(ConvPoolOpBase<CPUContext>::TensorInferenceForPool)
1083  .FillUsing(AveragePoolDocGenerator("3D"))
1084  .InheritOnnxSchema("AveragePool");
1085 
1086 REGISTER_CPU_OPERATOR(
1087  MaxPool,
1088  PoolOp<float, CPUContext, MaxPoolFunctor<CPUContext>>);
1089 
1090 OPERATOR_SCHEMA(MaxPool)
1091  .NumInputs(1)
1092  .NumOutputs(1)
1093  .TensorInferenceFunction(ConvPoolOpBase<CPUContext>::TensorInferenceForPool)
1094  .FillUsing(MaxPoolDocGenerator(""))
1095  .InheritOnnxSchema();
1096 
1097 REGISTER_CPU_OPERATOR(
1098  MaxPool1D,
1099  PoolOp<float, CPUContext, MaxPoolFunctor<CPUContext>>);
1100 
1101 OPERATOR_SCHEMA(MaxPool1D)
1102  .NumInputs(1)
1103  .NumOutputs(1)
1104  .TensorInferenceFunction(ConvPoolOpBase<CPUContext>::TensorInferenceForPool)
1105  .FillUsing(MaxPoolDocGenerator("1D"))
1106  .InheritOnnxSchema("MaxPool");
1107 
1108 REGISTER_CPU_OPERATOR(
1109  MaxPool2D,
1110  PoolOp<float, CPUContext, MaxPoolFunctor<CPUContext>>);
1111 
1112 OPERATOR_SCHEMA(MaxPool2D)
1113  .NumInputs(1)
1114  .NumOutputs(1)
1115  .TensorInferenceFunction(ConvPoolOpBase<CPUContext>::TensorInferenceForPool)
1116  .FillUsing(MaxPoolDocGenerator("2D"))
1117  .InheritOnnxSchema("MaxPool");
1118 
1119 REGISTER_CPU_OPERATOR(
1120  MaxPool3D,
1121  PoolOp<float, CPUContext, MaxPoolFunctor<CPUContext>>);
1122 
1123 OPERATOR_SCHEMA(MaxPool3D)
1124  .NumInputs(1)
1125  .NumOutputs(1)
1126  .TensorInferenceFunction(ConvPoolOpBase<CPUContext>::TensorInferenceForPool)
1127  .FillUsing(MaxPoolDocGenerator("3D"))
1128  .InheritOnnxSchema("MaxPool");
1129 
1130 } // namespace caffe2
A global dictionary that holds information about what Caffe2 modules have been loaded in the current ...
Definition: blob.h:13
Definition: static.cpp:64