1 #include "caffe2/operators/pool_op.h"     7 #include "caffe2/operators/pool_op_util.h"     8 #include "caffe2/utils/eigen_utils.h"     9 #include "caffe2/utils/math.h"    15 template <
typename T, StorageOrder kOrder>
    16 void ComputeAveragePool1D(
    21     const ConstEigenArrayMap<T>& X_arr,
    22     EigenArrayMap<T>* Y_arr);
    25 void ComputeAveragePool1D<float, StorageOrder::NCHW>(
    30     const ConstEigenArrayMap<float>& X_arr,
    31     EigenArrayMap<float>* Y_arr) {
    32   (*Y_arr)(y) = X_arr.col(0).segment(l, r - l).sum() * scale;
    36 void ComputeAveragePool1D<float, StorageOrder::NHWC>(
    41     const ConstEigenArrayMap<float>& X_arr,
    42     EigenArrayMap<float>* Y_arr) {
    43   Y_arr->col(y) = X_arr.col(l);
    44   for (
int i = l + 1; i < r; ++i) {
    45     Y_arr->col(y) += X_arr.col(i);
    47   Y_arr->col(y) *= scale;
    50 template <
typename T, StorageOrder kOrder>
    51 void ComputeAveragePool2D(
    59     const ConstEigenArrayMap<T>& X_arr,
    60     EigenArrayMap<T>* Y_arr);
    63 void ComputeAveragePool2D<float, StorageOrder::NCHW>(
    71     const ConstEigenArrayMap<float>& X_arr,
    72     EigenArrayMap<float>* Y_arr) {
    73   (*Y_arr)(y) = X_arr.block(l, t, r - l, b - t).sum() * scale;
    77 void ComputeAveragePool2D<float, StorageOrder::NHWC>(
    85     const ConstEigenArrayMap<float>& X_arr,
    86     EigenArrayMap<float>* Y_arr) {
    87   Y_arr->col(y).setZero();
    88   for (
int i = t; i < b; ++i) {
    89     for (
int j = l; j < r; ++j) {
    90       Y_arr->col(y) += X_arr.col(i * W + j);
    93   Y_arr->col(y) *= scale;
    96 template <
typename T, StorageOrder kOrder>
    97 void ComputeAveragePool3D(
   108     const ConstEigenArrayMap<T>& X_arr,
   109     EigenArrayMap<T>* Y_arr);
   112 void ComputeAveragePool3D<float, StorageOrder::NCHW>(
   123     const ConstEigenArrayMap<float>& X_arr,
   124     EigenArrayMap<float>* Y_arr) {
   126   for (
int i = p; i < a; ++i) {
   127     (*Y_arr)(y) += X_arr.block(l, i * H + t, r - l, b - t).sum();
   129   (*Y_arr)(y) *= scale;
   133 void ComputeAveragePool3D<float, StorageOrder::NHWC>(
   144     const ConstEigenArrayMap<float>& X_arr,
   145     EigenArrayMap<float>* Y_arr) {
   146   Y_arr->col(y).setZero();
   147   for (
int i = p; i < a; ++i) {
   148     for (
int j = t; j < b; ++j) {
   149       for (
int k = l; k < r; ++k) {
   150         Y_arr->col(y) += X_arr.col(i * H * W + j * W + k);
   154   Y_arr->col(y) *= scale;
   157 template <
typename T, StorageOrder kOrder>
   158 void RunAveragePool1D(
   166     const bool count_include_pad,
   169   const int batch_size = kOrder == StorageOrder::NCHW ? N * C : N;
   170   const int X_stride = kOrder == StorageOrder::NCHW ? X_size : X_size * C;
   171   const int Y_stride = kOrder == StorageOrder::NCHW ? Y_size : Y_size * C;
   174   for (
int i = 0; i < batch_size; ++i) {
   175     ConstEigenArrayMap<T> X_arr = kOrder == StorageOrder::NCHW
   176         ? ConstEigenArrayMap<T>(X_ptr, X_size, 1)
   177         : ConstEigenArrayMap<T>(X_ptr, C, X_size);
   178     EigenArrayMap<T> Y_arr = kOrder == StorageOrder::NCHW
   179         ? EigenArrayMap<T>(Y_ptr, Y_size, 1)
   180         : EigenArrayMap<T>(Y_ptr, C, Y_size);
   181     for (
int y = 0; y < Y_size; ++y) {
   182       const int l = std::max(y * stride - pad, 0);
   183       const int r = std::min(y * stride - pad + kernel, X_size);
   184       const T scale = 
T(1) / 
static_cast<T>(count_include_pad ? kernel : r - l);
   185       ComputeAveragePool1D<T, kOrder>(l, r, y, scale, X_arr, &Y_arr);
   192 template <
typename T, StorageOrder kOrder>
   193 void RunAveragePool2D(
   206     const bool count_include_pad,
   209   const int batch_size = kOrder == StorageOrder::NCHW ? N * C : N;
   210   const int X_HxW = X_H * X_W;
   211   const int Y_HxW = Y_H * Y_W;
   212   const int X_stride = kOrder == StorageOrder::NCHW ? X_HxW : X_HxW * C;
   213   const int Y_stride = kOrder == StorageOrder::NCHW ? Y_HxW : Y_HxW * C;
   216   for (
int i = 0; i < batch_size; ++i) {
   217     ConstEigenArrayMap<T> X_arr = kOrder == StorageOrder::NCHW
   218         ? ConstEigenArrayMap<T>(X_ptr, X_W, X_H)
   219         : ConstEigenArrayMap<T>(X_ptr, C, X_HxW);
   220     EigenArrayMap<T> Y_arr = kOrder == StorageOrder::NCHW
   221         ? EigenArrayMap<T>(Y_ptr, Y_W, Y_H)
   222         : EigenArrayMap<T>(Y_ptr, C, Y_HxW);
   223     for (
int h = 0; h < Y_H; ++h) {
   224       const int t = std::max(h * stride_h - pad_t, 0);
   225       const int b = std::min(h * stride_h - pad_t + kernel_h, X_H);
   226       for (
int w = 0; w < Y_W; ++w) {
   227         const int l = std::max(w * stride_w - pad_l, 0);
   228         const int r = std::min(w * stride_w - pad_l + kernel_w, X_W);
   229         const int y = h * Y_W + w;
   230         const T scale = 
T(1) /
   231             static_cast<T>(count_include_pad ? kernel_h * kernel_w
   232                                              : (b - t) * (r - l));
   233         ComputeAveragePool2D<T, kOrder>(
   234             X_W, t, b, l, r, y, scale, X_arr, &Y_arr);
   242 template <
typename T, StorageOrder kOrder>
   243 void RunAveragePool3D(
   261     const bool count_include_pad,
   264   const int batch_size = kOrder == StorageOrder::NCHW ? N * C : N;
   265   const int X_HxW = X_D * X_H * X_W;
   266   const int Y_HxW = Y_D * Y_H * Y_W;
   267   const int X_stride = kOrder == StorageOrder::NCHW ? X_HxW : X_HxW * C;
   268   const int Y_stride = kOrder == StorageOrder::NCHW ? Y_HxW : Y_HxW * C;
   271   for (
int i = 0; i < batch_size; ++i) {
   272     ConstEigenArrayMap<T> X_arr = kOrder == StorageOrder::NCHW
   273         ? ConstEigenArrayMap<T>(X_ptr, X_W, X_D * X_H)
   274         : ConstEigenArrayMap<T>(X_ptr, C, X_HxW);
   275     EigenArrayMap<T> Y_arr = kOrder == StorageOrder::NCHW
   276         ? EigenArrayMap<T>(Y_ptr, Y_W, Y_D * Y_H)
   277         : EigenArrayMap<T>(Y_ptr, C, Y_HxW);
   278     for (
int d = 0; d < Y_D; ++d) {
   279       const int p = std::max(d * stride_d - pad_p, 0);
   280       const int a = std::min(d * stride_d - pad_p + kernel_d, X_D);
   281       for (
int h = 0; h < Y_H; ++h) {
   282         const int t = std::max(h * stride_h - pad_t, 0);
   283         const int b = std::min(h * stride_h - pad_t + kernel_h, X_H);
   284         for (
int w = 0; w < Y_W; ++w) {
   285           const int l = std::max(w * stride_w - pad_l, 0);
   286           const int r = std::min(w * stride_w - pad_l + kernel_w, X_W);
   287           const int y = d * Y_H * Y_W + h * Y_W + w;
   288           const T scale = 
T(1) /
   289               static_cast<T>(count_include_pad ? kernel_d * kernel_h * kernel_w
   290                                                : (a - p) * (b - t) * (r - l));
   291           ComputeAveragePool3D<T, kOrder>(
   292               X_H, X_W, p, a, t, b, l, r, y, scale, X_arr, &Y_arr);
   301 template <
typename T, StorageOrder kOrder>
   302 void ComputeMaxPool1D(
   306     const ConstEigenArrayMap<T>& X_arr,
   307     EigenArrayMap<T>* Y_arr);
   310 void ComputeMaxPool1D<float, StorageOrder::NCHW>(
   314     const ConstEigenArrayMap<float>& X_arr,
   315     EigenArrayMap<float>* Y_arr) {
   316   (*Y_arr)(y) = X_arr.col(0).segment(l, r - l).maxCoeff();
   320 void ComputeMaxPool1D<float, StorageOrder::NHWC>(
   324     const ConstEigenArrayMap<float>& X_arr,
   325     EigenArrayMap<float>* Y_arr) {
   326   Y_arr->col(y) = X_arr.col(l);
   327   for (
int i = l + 1; i < r; ++i) {
   328     Y_arr->col(y) = Y_arr->col(y).max(X_arr.col(i));
   332 template <
typename T, StorageOrder kOrder>
   333 void ComputeMaxPool2D(
   340     const ConstEigenArrayMap<T>& X_arr,
   341     EigenArrayMap<T>* Y_arr);
   344 void ComputeMaxPool2D<float, StorageOrder::NCHW>(
   351     const ConstEigenArrayMap<float>& X_arr,
   352     EigenArrayMap<float>* Y_arr) {
   353   (*Y_arr)(y) = X_arr.block(l, t, r - l, b - t).maxCoeff();
   357 void ComputeMaxPool2D<float, StorageOrder::NHWC>(
   364     const ConstEigenArrayMap<float>& X_arr,
   365     EigenArrayMap<float>* Y_arr) {
   366   Y_arr->col(y).setConstant(std::numeric_limits<float>::lowest());
   367   for (
int i = t; i < b; ++i) {
   368     for (
int j = l; j < r; ++j) {
   369       Y_arr->col(y) = Y_arr->col(y).max(X_arr.col(i * W + j));
   374 template <
typename T, StorageOrder kOrder>
   375 void ComputeMaxPool3D(
   385     const ConstEigenArrayMap<T>& X_arr,
   386     EigenArrayMap<T>* Y_arr);
   389 void ComputeMaxPool3D<float, StorageOrder::NCHW>(
   399     const ConstEigenArrayMap<float>& X_arr,
   400     EigenArrayMap<float>* Y_arr) {
   401   (*Y_arr)(y) = std::numeric_limits<float>::lowest();
   402   for (
int i = p; i < a; ++i) {
   403     (*Y_arr)(y) = std::max(
   404         (*Y_arr)(y), X_arr.block(l, i * H + t, r - l, b - t).maxCoeff());
   409 void ComputeMaxPool3D<float, StorageOrder::NHWC>(
   419     const ConstEigenArrayMap<float>& X_arr,
   420     EigenArrayMap<float>* Y_arr) {
   421   Y_arr->col(y).setConstant(std::numeric_limits<float>::lowest());
   422   for (
int i = p; i < a; ++i) {
   423     for (
int j = t; j < b; ++j) {
   424       for (
int k = l; k < r; ++k) {
   425         Y_arr->col(y) = Y_arr->col(y).max(X_arr.col(i * H * W + j * W + k));
   431 template <
typename T, StorageOrder kOrder>
   442   const int batch_size = kOrder == StorageOrder::NCHW ? N * C : N;
   443   const int X_stride = kOrder == StorageOrder::NCHW ? X_size : X_size * C;
   444   const int Y_stride = kOrder == StorageOrder::NCHW ? Y_size : Y_size * C;
   447   for (
int i = 0; i < batch_size; ++i) {
   448     ConstEigenArrayMap<T> X_arr = kOrder == StorageOrder::NCHW
   449         ? ConstEigenArrayMap<T>(X_ptr, X_size, 1)
   450         : ConstEigenArrayMap<T>(X_ptr, C, X_size);
   451     EigenArrayMap<T> Y_arr = kOrder == StorageOrder::NCHW
   452         ? EigenArrayMap<T>(Y_ptr, Y_size, 1)
   453         : EigenArrayMap<T>(Y_ptr, C, Y_size);
   454     for (
int y = 0; y < Y_size; ++y) {
   455       const int l = std::max(y * stride - pad, 0);
   456       const int r = std::min(y * stride - pad + kernel, X_size);
   457       ComputeMaxPool1D<T, kOrder>(l, r, y, X_arr, &Y_arr);
   464 template <
typename T, StorageOrder kOrder>
   480   const int batch_size = kOrder == StorageOrder::NCHW ? N * C : N;
   481   const int X_HxW = X_H * X_W;
   482   const int Y_HxW = Y_H * Y_W;
   483   const int X_stride = kOrder == StorageOrder::NCHW ? X_HxW : X_HxW * C;
   484   const int Y_stride = kOrder == StorageOrder::NCHW ? Y_HxW : Y_HxW * C;
   487   for (
int i = 0; i < batch_size; ++i) {
   488     ConstEigenArrayMap<T> X_arr = kOrder == StorageOrder::NCHW
   489         ? ConstEigenArrayMap<T>(X_ptr, X_W, X_H)
   490         : ConstEigenArrayMap<T>(X_ptr, C, X_HxW);
   491     EigenArrayMap<T> Y_arr = kOrder == StorageOrder::NCHW
   492         ? EigenArrayMap<T>(Y_ptr, Y_W, Y_H)
   493         : EigenArrayMap<T>(Y_ptr, C, Y_HxW);
   494     for (
int h = 0; h < Y_H; ++h) {
   495       const int t = std::max(h * stride_h - pad_t, 0);
   496       const int b = std::min(h * stride_h - pad_t + kernel_h, X_H);
   497       for (
int w = 0; w < Y_W; ++w) {
   498         const int l = std::max(w * stride_w - pad_l, 0);
   499         const int r = std::min(w * stride_w - pad_l + kernel_w, X_W);
   500         const int y = h * Y_W + w;
   501         ComputeMaxPool2D<T, kOrder>(X_W, t, b, l, r, y, X_arr, &Y_arr);
   508 template <
typename T, StorageOrder kOrder>
   529   const int batch_size = kOrder == StorageOrder::NCHW ? N * C : N;
   530   const int X_HxW = X_D * X_H * X_W;
   531   const int Y_HxW = Y_D * Y_H * Y_W;
   532   const int X_stride = kOrder == StorageOrder::NCHW ? X_HxW : X_HxW * C;
   533   const int Y_stride = kOrder == StorageOrder::NCHW ? Y_HxW : Y_HxW * C;
   536   for (
int i = 0; i < batch_size; ++i) {
   537     ConstEigenArrayMap<T> X_arr = kOrder == StorageOrder::NCHW
   538         ? ConstEigenArrayMap<T>(X_ptr, X_W, X_D * X_H)
   539         : ConstEigenArrayMap<T>(X_ptr, C, X_HxW);
   540     EigenArrayMap<T> Y_arr = kOrder == StorageOrder::NCHW
   541         ? EigenArrayMap<T>(Y_ptr, Y_W, Y_D * Y_H)
   542         : EigenArrayMap<T>(Y_ptr, C, Y_HxW);
   543     for (
int d = 0; d < Y_D; ++d) {
   544       const int p = std::max(d * stride_d - pad_p, 0);
   545       const int a = std::min(d * stride_d - pad_p + kernel_d, X_D);
   546       for (
int h = 0; h < Y_H; ++h) {
   547         const int t = std::max(h * stride_h - pad_t, 0);
   548         const int b = std::min(h * stride_h - pad_t + kernel_h, X_H);
   549         for (
int w = 0; w < Y_W; ++w) {
   550           const int l = std::max(w * stride_w - pad_l, 0);
   551           const int r = std::min(w * stride_w - pad_l + kernel_w, X_W);
   552           const int y = d * Y_H * Y_W + h * Y_W + w;
   553           ComputeMaxPool3D<T, kOrder>(
   554               X_H, X_W, p, a, t, b, l, r, y, X_arr, &Y_arr);
   567 bool AveragePoolFunctor<CPUContext>::
   568     GlobalPoolingForward<float, StorageOrder::NCHW>(
   574         CPUContext* context) 
const {
   575   const std::array<int, 2> X_dims = {N * C, HxW};
   576   const std::array<int, 2> Y_dims = {N * C, 1};
   577   math::ReduceMean<float, CPUContext>(
   578       2, X_dims.data(), Y_dims.data(), 1.0f, X, Y, context);
   584 bool AveragePoolFunctor<CPUContext>::
   585     GlobalPoolingForward<float, StorageOrder::NHWC>(
   591         CPUContext* context) 
const {
   592   math::Set<float, CPUContext>(N * C, 0.0f, Y, context);
   593   const float* X_ptr = X;
   595   for (
int i = 0; i < N; ++i) {
   596     for (
int j = 0; j < HxW; ++j) {
   597       math::Add<float, CPUContext>(C, Y_ptr, X_ptr + j * C, Y_ptr, context);
   602   math::Scale<float, float, CPUContext>(
   603       N * C, 1.0f / 
static_cast<float>(HxW), Y, Y, context);
   607 #define CAFFE2_SPECIALIZED_AVERAGE_POOL_FUNCTOR_FORWARD(T, kOrder)           \   610   bool AveragePoolFunctor<CPUContext>::Forward<T, kOrder>(                   \   613       const std::vector<int>& X_dims,                                        \   614       const std::vector<int>& Y_dims,                                        \   615       const std::vector<int>& kernel,                                        \   616       const std::vector<int>& dilation,                                      \   617       const std::vector<int>& stride,                                        \   618       const std::vector<int>& pads,                                          \   621       CPUContext* ) const {                                     \   622     const int ndim = X_dims.size();                                          \   625         RunAveragePool1D<T, kOrder>(                                         \   639         if (std::is_same<T, float>::value && kOrder == StorageOrder::NCHW && \   640             pool_op_util::IsNeon4x4p0s0Eligible(                             \   657           pool_op_util::RunNeonAveragePool4x4p0s0NCHW(                       \   658               N, C, X_dims[0], X_dims[1], X, Y);                             \   660           RunAveragePool2D<T, kOrder>(                                       \   680         RunAveragePool3D<T, kOrder>(                                         \   704         CAFFE_THROW("Unsupported pooling dim: ", ndim);                      \   709 CAFFE2_SPECIALIZED_AVERAGE_POOL_FUNCTOR_FORWARD(
float, StorageOrder::NCHW)
   710 CAFFE2_SPECIALIZED_AVERAGE_POOL_FUNCTOR_FORWARD(
float, StorageOrder::NHWC)
   711 #undef CAFFE2_SPECIALIZED_AVERAGE_POOL_FUNCTOR_FORWARD   715 bool MaxPoolFunctor<CPUContext>::
   716     GlobalPoolingForward<float, StorageOrder::NCHW>(
   722         CPUContext* context) 
const {
   723   const std::array<int, 2> X_dims = {N * C, HxW};
   724   const std::array<int, 2> Y_dims = {N * C, 1};
   725   math::ReduceMax<float, CPUContext>(
   726       2, X_dims.data(), Y_dims.data(), 1.0f, X, Y, context);
   732 bool MaxPoolFunctor<CPUContext>::
   733     GlobalPoolingForward<float, StorageOrder::NHWC>(
   739         CPUContext* context) 
const {
   740   math::Set<float, CPUContext>(
   741       N * C, std::numeric_limits<float>::lowest(), Y, context);
   742   const float* X_ptr = X;
   744   for (
int i = 0; i < N; ++i) {
   745     ConstEigenArrayMap<float> X_arr(X_ptr, C, HxW);
   746     EigenVectorArrayMap<float> Y_arr(Y_ptr, C);
   747     for (
int j = 0; j < HxW; ++j) {
   748       Y_arr = Y_arr.max(X_arr.col(j));
   756 #define CAFFE2_SPECIALIZED_MAX_POOL_FUNCTOR_FORWARD(T, kOrder)                \   759   bool MaxPoolFunctor<CPUContext>::Forward<T, kOrder>(                        \   762       const std::vector<int>& X_dims,                                         \   763       const std::vector<int>& Y_dims,                                         \   764       const std::vector<int>& kernel,                                         \   765       const std::vector<int>& dilation,                                       \   766       const std::vector<int>& stride,                                         \   767       const std::vector<int>& pads,                                           \   770       CPUContext* ) const {                                      \   771     const int ndim = X_dims.size();                                           \   774         RunMaxPool1D<T, kOrder>(                                              \   775             N, C, X_dims[0], Y_dims[0], kernel[0], stride[0], pads[0], X, Y); \   779         if (std::is_same<T, float>::value && kOrder == StorageOrder::NCHW &&  \   780             pool_op_util::IsNeon2x2p0s0Eligible(                              \   797           pool_op_util::RunNeonMaxPool2x2p0s0NCHW(                            \   798               N, C, X_dims[0], X_dims[1], X, Y);                              \   800           RunMaxPool2D<T, kOrder>(                                            \   819         RunMaxPool3D<T, kOrder>(                                              \   842         CAFFE_THROW("Unsupported pooling dim: ", ndim);                       \   847 CAFFE2_SPECIALIZED_MAX_POOL_FUNCTOR_FORWARD(
float, StorageOrder::NCHW)
   848 CAFFE2_SPECIALIZED_MAX_POOL_FUNCTOR_FORWARD(
float, StorageOrder::NHWC)
   849 #undef CAFFE2_SPECIALIZED_MAX_POOL_FUNCTOR_FORWARD   851 constexpr 
char kAveragePoolDoc[] = R
"DOC(   852 consumes an input blob and applies average pooling across the the blob according   853 to kernel sizes, stride sizes, pad lengths and dilation. Average pooling consists   854 of taking the average value of a subset of the input tensor according to the kernel   855 size and downsampling the data into the output blob for further processing. The   856 `brew` module has a wrapper for this operator for use in a `ModelHelper` object.   858 Pooling layers reduce the spatial dimensionality of the input blob. Each of the   859 output blob's dimensions will reduce according to:   861 $$dim_{out}=\frac{dim_{in}-kernel+2*pad}{stride}+1$$   865 - https://github.com/pytorch/pytorch/blob/master/caffe2/operators/pool_op.h   866 - https://github.com/pytorch/pytorch/blob/master/caffe2/operators/pool_op.cc   867 - https://github.com/pytorch/pytorch/blob/master/caffe2/operators/conv_pool_op_base.h   872 <summary> <b>Example</b> </summary>   877 workspace.ResetWorkspace()   879 op = core.CreateOperator(   887 workspace.FeedBlob("X", np.random.randn(1, 1, 6, 6).astype(np.float32)) // NCHW   888 print("X:\n", workspace.FetchBlob("X"), "\n")   889 workspace.RunOperatorOnce(op)   890 print("Y:\n", workspace.FetchBlob("Y"))   897  [[[[-0.2883434   0.43498734  0.05417408  1.912558    0.09390241   899    [ 1.633709    1.2047161   0.36964908  0.99961185  0.4184147   901    [ 1.7644193   0.1789665   1.5812988  -0.6038542  -0.36090398   903    [ 0.9457722  -0.95174325 -0.78124577  1.2062047   1.1903144   905    [ 1.252104    0.32645547  1.8073524  -0.78397465  0.9978303   907    [ 0.5440196   1.5778259  -0.76750124  0.5051756   0.8838398   911  [[[[0.7462672  0.83399826 0.2948959 ]   912    [0.4843537  0.3506009  0.35500962]   913    [0.9251013  0.19026303 0.13366827]]]]   920 constexpr char kMaxPoolDoc[] = R
"DOC(   921 consumes an input blob and applies max pooling across the the blob according to   922 kernel sizes, stride sizes, pad lengths and dilation. Max pooling consists of   923 taking the maximum value of a subset of the input tensor according to the kernel   924 size and downsampling the data into the output blob for further processing. The   925 `brew` module has a wrapper for this operator for use in a `ModelHelper` object.   927 Pooling layers reduce the spatial dimensionality of the input blob. Each of the   928 output blob's dimensions will reduce according to:   930 $$dim_{out}=\frac{dim_{in}-kernel+2*pad}{stride}+1$$   934 - https://github.com/pytorch/pytorch/blob/master/caffe2/operators/pool_op.h   935 - https://github.com/pytorch/pytorch/blob/master/caffe2/operators/pool_op.cc   936 - https://github.com/pytorch/pytorch/blob/master/caffe2/operators/conv_pool_op_base.h   940 <summary> <b>Example</b> </summary>   945 workspace.ResetWorkspace()   947 op = core.CreateOperator(   955 workspace.FeedBlob("X", np.random.randn(1, 1, 6, 6).astype(np.float32)) // NCHW   956 print("X:\n", workspace.FetchBlob("X"), "\n")   957 workspace.RunOperatorOnce(op)   958 print("Y:\n", workspace.FetchBlob("Y"))   965  [[[[-2.8534958e-01 -1.7719941e+00 -8.2277227e-04  1.1088650e+00   966     -2.1476576e+00 -3.5070452e-01]   967    [-9.0058845e-01 -3.0070004e-01 -1.7907504e+00 -7.1746534e-01   968      1.2798511e+00 -3.2214901e-01]   969    [ 1.5806322e+00  1.6845188e+00 -2.6633200e-01 -3.8576153e-01   970     -9.6424848e-02 -3.9696163e-01]   971    [ 1.2572408e-01  6.3612902e-01 -3.9554062e-01 -6.9735396e-01   972     -9.1898698e-01 -1.9609968e-01]   973    [-1.1587460e+00  2.4605224e+00 -1.5497679e+00  1.3020347e-01   974     -8.1293899e-01 -7.8803545e-01]   975    [ 1.4323474e+00  1.3618395e+00  9.8975077e-02 -1.1307785e-01   976      7.2035044e-01  2.7642491e-01]]]]   979  [[[[-0.28534958  1.108865    1.2798511 ]   980    [ 1.6845188  -0.266332   -0.09642485]   981    [ 2.4605224   0.13020347  0.72035044]]]]   989 std::function<void(OpSchema&)> AveragePoolDocGenerator(const char* dim) {
   990   return [=](OpSchema& schema) {
   991     std::string doc = 
"AveragePool{dim} {pool_doc}";
   992     c10::ReplaceAll(doc, 
"{dim}", dim);
   993     c10::ReplaceAll(doc, 
"{pool_doc}", kAveragePoolDoc);
   998         "*(type: Tensor`<float>`)* Input data tensor of shape NCHW or NHWC.");
   999     schema.Output(0, 
"Y", 
"*(type: Tensor`<float>`)* Output data tensor.");
  1021 std::function<void(OpSchema&)> MaxPoolDocGenerator(
const char* dim) {
  1022   return [=](OpSchema& schema) {
  1023     std::string doc = 
"MaxPool{dim} {pool_doc}";
  1024     c10::ReplaceAll(doc, 
"{dim}", dim);
  1025     c10::ReplaceAll(doc, 
"{pool_doc}", kMaxPoolDoc);
  1030         "*(type: Tensor`<float>`)* Input data tensor of shape NCHW or NHWC.");
  1031     schema.Output(0, 
"Y", 
"*(type: Tensor`<float>`)* Output data tensor.");
  1042 REGISTER_CPU_OPERATOR(
  1044     PoolOp<
float, CPUContext, AveragePoolFunctor<CPUContext>>);
  1049     .TensorInferenceFunction(ConvPoolOpBase<CPUContext>::TensorInferenceForPool)
  1050     .FillUsing(AveragePoolDocGenerator(
""))
  1051     .InheritOnnxSchema();
  1053 REGISTER_CPU_OPERATOR(
  1055     PoolOp<
float, CPUContext, AveragePoolFunctor<CPUContext>>);
  1057 OPERATOR_SCHEMA(AveragePool1D)
  1060     .TensorInferenceFunction(ConvPoolOpBase<CPUContext>::TensorInferenceForPool)
  1061     .FillUsing(AveragePoolDocGenerator(
"1D"))
  1062     .InheritOnnxSchema(
"AveragePool");
  1064 REGISTER_CPU_OPERATOR(
  1066     PoolOp<
float, CPUContext, AveragePoolFunctor<CPUContext>>);
  1068 OPERATOR_SCHEMA(AveragePool2D)
  1071     .TensorInferenceFunction(ConvPoolOpBase<CPUContext>::TensorInferenceForPool)
  1072     .FillUsing(AveragePoolDocGenerator(
"2D"))
  1073     .InheritOnnxSchema(
"AveragePool");
  1075 REGISTER_CPU_OPERATOR(
  1077     PoolOp<
float, CPUContext, AveragePoolFunctor<CPUContext>>);
  1079 OPERATOR_SCHEMA(AveragePool3D)
  1082     .TensorInferenceFunction(ConvPoolOpBase<CPUContext>::TensorInferenceForPool)
  1083     .FillUsing(AveragePoolDocGenerator(
"3D"))
  1084     .InheritOnnxSchema(
"AveragePool");
  1086 REGISTER_CPU_OPERATOR(
  1088     PoolOp<
float, CPUContext, MaxPoolFunctor<CPUContext>>);
  1093     .TensorInferenceFunction(ConvPoolOpBase<CPUContext>::TensorInferenceForPool)
  1094     .FillUsing(MaxPoolDocGenerator(
""))
  1095     .InheritOnnxSchema();
  1097 REGISTER_CPU_OPERATOR(
  1099     PoolOp<
float, CPUContext, MaxPoolFunctor<CPUContext>>);
  1101 OPERATOR_SCHEMA(MaxPool1D)
  1104     .TensorInferenceFunction(ConvPoolOpBase<CPUContext>::TensorInferenceForPool)
  1105     .FillUsing(MaxPoolDocGenerator(
"1D"))
  1106     .InheritOnnxSchema(
"MaxPool");
  1108 REGISTER_CPU_OPERATOR(
  1110     PoolOp<
float, CPUContext, MaxPoolFunctor<CPUContext>>);
  1112 OPERATOR_SCHEMA(MaxPool2D)
  1115     .TensorInferenceFunction(ConvPoolOpBase<CPUContext>::TensorInferenceForPool)
  1116     .FillUsing(MaxPoolDocGenerator(
"2D"))
  1117     .InheritOnnxSchema(
"MaxPool");
  1119 REGISTER_CPU_OPERATOR(
  1121     PoolOp<
float, CPUContext, MaxPoolFunctor<CPUContext>>);
  1123 OPERATOR_SCHEMA(MaxPool3D)
  1126     .TensorInferenceFunction(ConvPoolOpBase<CPUContext>::TensorInferenceForPool)
  1127     .FillUsing(MaxPoolDocGenerator(
"3D"))
  1128     .InheritOnnxSchema(
"MaxPool");
 
A global dictionary that holds information about what Caffe2 modules have been loaded in the current ...