doxygen-c/html/lp__pool__op_8cc_source.html

 // TODO: reduce the apparent redundancy of all the code below.
 #include "caffe2/operators/pool_op.h"

 namespace caffe2 {

 using std::max;
 using std::min;

 struct LpPoolFunctor {
   explicit LpPoolFunctor(const OperatorBase& /* op */) {}
 };

 template <>
 bool PoolOp<float, CPUContext, LpPoolFunctor>::RunOnDeviceWithOrderNCHW() {
   auto& X = Input(0);
   auto* Y = Output(0);
   ConvPoolOpBase::SetOutputSize(X, Y, X.dim32(1));
   const auto p = OperatorBase::GetSingleArgument<float>("p", 2.0);
   const auto inv_p = 1.0 / p;

   const float* Xdata = X.data<float>();
   float* Ydata = Y->template mutable_data<float>();
   math::Set<float, CPUContext>(Y->numel(), 0, Ydata, &context_);
   // The main loop
   int channels = X.dim32(1);
   int height = X.dim32(2);
   int width = X.dim32(3);
   int pooled_height = Y->dim32(2);
   int pooled_width = Y->dim32(3);

   for (int n = 0; n < X.dim32(0); ++n) {
     for (int c = 0; c < channels; ++c) {
       for (int ph = 0; ph < pooled_height; ++ph) {
         for (int pw = 0; pw < pooled_width; ++pw) {
           int hstart = ph * stride_[0] - pads_[0];
           int wstart = pw * stride_[1] - pads_[1];
           int hend = min(hstart + kernel_[0], height);
           int wend = min(wstart + kernel_[1], width);
           hstart = max(hstart, 0);
           wstart = max(wstart, 0);
           const int pool_index = ph * pooled_width + pw;
           for (int h = hstart; h < hend; ++h) {
             for (int w = wstart; w < wend; ++w) {
               const int input_index = h * width + w;
               Ydata[pool_index] += std::pow(std::abs(Xdata[input_index]), p);
             }
           }
           Ydata[pool_index] = std::pow(Ydata[pool_index], inv_p);
         }
       }
       // Do offset.
       Xdata += height * width;
       Ydata += pooled_height * pooled_width;
     }
   }
   return true;
 }

 template <>
 bool PoolOp<float, CPUContext, LpPoolFunctor>::RunOnDeviceWithOrderNHWC() {
   auto& X = Input(0);
   auto* Y = Output(0);
   int height = X.dim32(1);
   int width = X.dim32(2);
   int channels = X.dim32(3);
   ConvPoolOpBase::SetOutputSize(X, Y, channels);

   const auto p = OperatorBase::GetSingleArgument<float>("p", 2.0);
   const auto inv_p = 1.0 / p;

   const float* Xdata = X.data<float>();
   float* Ydata = Y->template mutable_data<float>();
   math::Set<float, CPUContext>(Y->numel(), 0, Ydata, &context_);
   // The main loop
   int pooled_height = Y->dim32(1);
   int pooled_width = Y->dim32(2);
   for (int n = 0; n < X.dim32(0); ++n) {
     for (int ph = 0; ph < pooled_height; ++ph) {
       for (int pw = 0; pw < pooled_width; ++pw) {
         int hstart = ph * stride_[0] - pads_[0];
         int wstart = pw * stride_[1] - pads_[1];
         int hend = min(hstart + kernel_[0], height);
         int wend = min(wstart + kernel_[1], width);
         hstart = max(hstart, 0);
         wstart = max(wstart, 0);
         const int pool_index = (ph * pooled_width + pw) * channels;
         for (int h = hstart; h < hend; ++h) {
           for (int w = wstart; w < wend; ++w) {
             const int input_index = (h * width + w) * channels;
             for (int c = 0; c < channels; ++c) {
               Ydata[pool_index + c] +=
                   std::pow(std::abs(Xdata[input_index + c]), p);
             }
           }
         }
         for (int c = 0; c < channels; ++c) {
           Ydata[pool_index + c] = std::pow(Ydata[pool_index + c], inv_p);
         }
       }
     }
     // Do offset.
     Xdata += X.numel() / X.dim32(0);
     Ydata += Y->numel() / Y->dim32(0);
   }
   return true;
 }

 template <>
 bool PoolGradientOp<float, CPUContext, LpPoolFunctor>::
     RunOnDeviceWithOrderNCHW() {
   const auto& X = Input(0);
   const auto& Y = Input(1);
   auto& dY = Input(2);

   const auto p = OperatorBase::GetSingleArgument<float>("p", 2.0);
   const auto inv_p = 1.0 / p;

   // TODO(Yangqing): Add shape checks.
   auto* dX = Output(0, X.sizes(), at::dtype<float>());
   math::Set<float, CPUContext>(
       X.numel(), 0, dX->template mutable_data<float>(), &context_);
   const float* dYdata = dY.data<float>();
   const float* Xdata = X.data<float>();
   const float* Ydata = Y.data<float>();
   float* dXdata = dX->template mutable_data<float>();

   int channels = X.dim32(1);
   CAFFE_ENFORCE_EQ(channels, dY.dim32(1));
   int height = X.dim32(2);
   int width = X.dim32(3);
   ConvPoolOpBase<CPUContext>::ComputePads({height, width});
   int pooled_height = dY.dim32(2);
   int pooled_width = dY.dim32(3);
   // The main loop
   for (int n = 0; n < X.dim32(0); ++n) {
     for (int c = 0; c < channels; ++c) {
       for (int ph = 0; ph < pooled_height; ++ph) {
         for (int pw = 0; pw < pooled_width; ++pw) {
           int hstart = ph * stride_[0] - pads_[0];
           int wstart = pw * stride_[1] - pads_[1];
           int hend = min(hstart + kernel_[0], height);
           int wend = min(wstart + kernel_[1], width);
           hstart = max(hstart, 0);
           wstart = max(wstart, 0);
           float scale = 1. / (hend - hstart) / (wend - wstart);
           for (int h = hstart; h < hend; ++h) {
             for (int w = wstart; w < wend; ++w) {
               // gradient of p-norm is x_j * |x_j|^{p-2} / |x|_p^{p-1}
               dXdata[h * width + w] += dYdata[ph * pooled_width + pw] *
                   Xdata[h * width + w] *
                   std::pow(std::abs(Xdata[h * width + w]), p - 2) /
                   std::pow(Ydata[ph * pooled_width + pw], p - 1);
             }
           }
         }
       }
       // offset
       dXdata += height * width;
       dYdata += pooled_height * pooled_width;
       Ydata += pooled_height * pooled_width;
       Xdata += height * width;
     }
   }
   return true;
 }

 template <>
 bool PoolGradientOp<float, CPUContext, LpPoolFunctor>::
     RunOnDeviceWithOrderNHWC() {
   const auto& X = Input(0);
   const auto& Y = Input(1);
   auto& dY = Input(2);
   CAFFE_ENFORCE_EQ(dY.dim(), 4);

   // TODO(Yangqing): Add shape checks.
   auto* dX = Output(0, X.sizes(), at::dtype<float>());
   math::Set<float, CPUContext>(
       X.numel(), 0, dX->template mutable_data<float>(), &context_);
   const float* dYdata = dY.data<float>();
   float* dXdata = dX->template mutable_data<float>();
   const float* Xdata = X.data<float>();
   const float* Ydata = Y.data<float>();
   // The main loop
   int height = X.dim32(1);
   int width = X.dim32(2);
   ConvPoolOpBase<CPUContext>::ComputePads({height, width});
   const auto p = OperatorBase::GetSingleArgument<float>("p", 2.0);
   const auto inv_p = 1.0 / p;

   int pooled_height = dY.dim32(1);
   int pooled_width = dY.dim32(2);
   int channels = X.dim32(3);
   CAFFE_ENFORCE_EQ(channels, dY.dim32(3));
   for (int n = 0; n < X.dim32(0); ++n) {
     for (int ph = 0; ph < pooled_height; ++ph) {
       for (int pw = 0; pw < pooled_width; ++pw) {
         int hstart = ph * stride_[0] - pads_[0];
         int wstart = pw * stride_[1] - pads_[1];
         int hend = min(hstart + kernel_[0], height);
         int wend = min(wstart + kernel_[1], width);
         hstart = max(hstart, 0);
         wstart = max(wstart, 0);
         float scale = 1. / (hend - hstart) / (wend - wstart);
         for (int h = hstart; h < hend; ++h) {
           for (int w = wstart; w < wend; ++w) {
             for (int c = 0; c < channels; ++c) {
               dXdata[(h * width + w) * channels + c] +=
                   dYdata[(ph * pooled_width + pw) * channels + c] *
                   Xdata[(h * width + w) * channels + c] *
                   std::pow(
                       std::abs(Xdata[(h * width + w) * channels + c]), p - 2) /
                   std::pow(
                       Ydata[(ph * pooled_width + pw) * channels + c], p - 1);
             }
           }
         }
       }
     }
     // offset
     dXdata += X.numel() / X.dim32(0);
     dYdata += dY.numel() / dY.dim32(0);
     Xdata += X.numel() / X.dim32(0);
     Ydata += Y.numel() / Y.dim32(0);
   }
   return true;
 }

 REGISTER_CPU_OPERATOR(LpPool, PoolOp<float, CPUContext, LpPoolFunctor>);
 REGISTER_CPU_OPERATOR(
     LpPoolGradient,
     PoolGradientOp<float, CPUContext, LpPoolFunctor>);

 OPERATOR_SCHEMA(LpPool)
     .NumInputs(1)
     .NumOutputs(1)
     .SetDoc(R"DOC(
 `LpPool` consumes an input blob and applies max pooling across the the blob according to kernel sizes, stride sizes, pad lengths and dilation. $L_p$ pooling consists of taking the $L_p$ norm of a subset of the input tensor according to the kernel size and downsampling the data into the output blob for further processing.

 Pooling layers reduce the spatial dimensionality of the input blob. Each of the output blob's dimensions will reduce according to:

 $$dim_{out}=\frac{dim_{in}-kernel+2*pad}{stride}+1$$

 Github Links:
 - https://github.com/pytorch/pytorch/blob/master/caffe2/operators/lp_pool_op.cc

 <details>

 <summary> <b>Example</b> </summary>

 **Code**

 ```

 workspace.ResetWorkspace()

 op = core.CreateOperator(
     "LpPool",
     ["X"],
     ["Y"],
     kernel=2,
     stride=2,
     p=2.0
 )

 workspace.FeedBlob("X", np.random.randn(1, 1, 6, 6).astype(np.float32)) // NCHW
 print("X:\n", workspace.FetchBlob("X"), "\n")
 workspace.RunOperatorOnce(op)
 print("Y:\n", workspace.FetchBlob("Y"))

 ```

 **Result**

 ```

 X:
  [[[[-1.1113514  -1.1173418  -0.1504435   0.1327146  -1.2221841  -0.5654315 ]
    [-1.9209646  -0.04675794  0.8604731   1.2042469   0.28154245   0.38656202]
    [-0.8772837  -0.03264008  0.26222762  0.28526652  0.321102    -2.5891325 ]
    [-0.9248281   1.440776   -0.56832    -0.6017927   1.2262512   -2.1443934 ]
    [ 0.5194415  -1.6858683   0.45221648  0.65029615 -0.8574544    0.8121054 ]
    [ 0.25902653  0.4934758   0.49870652 -0.48134378 -0.9178449   -0.07626943]]]]

 Y:
  [[[[2.4851248 1.49361   1.4290358]
    [1.9240153 0.9139378 3.5928857]
    [1.8500228 1.0525136 1.4976646]]]]

 ```

 </details>

 )DOC")
     .Arg("p", "(*float*): type of $L_p$ norm to use (default=2.0)")
     .Arg("kernel", "(*int*): the size of the window to take a max over")
     .Arg("stride", "(*int*): the stride of the window")
     .Arg("pad", "(*int*): implicit zero padding to be added on both sides")
     .Arg(
         "dilation",
         "(*int*): parameter that controls the stride of elements in the window")
     .Arg("order", "(*string*): order of blob dimensions (default=\"NCHW\")")
     .Input(0, "X", "(*Tensor`<float>`*): input tensor")
     .Output(0, "Y", "(*Tensor`<float>`*): output tensor");

 OPERATOR_SCHEMA(LpPoolGradient).NumInputs(3).NumOutputs(1);

 class GetPoolGradient : public GradientMakerBase {
   using GradientMakerBase::GradientMakerBase;
   vector<OperatorDef> GetGradientDefs() override {
     return SingleGradientDef(
         def_.type() + "Gradient",
         "",
         vector<string>{I(0), O(0), GO(0)},
         vector<string>{GI(0)});
   }
 };
 REGISTER_GRADIENT(LpPool, GetPoolGradient);
 } // namespace caffe2
caffe2::GradientMakerBase
Definition: operator_gradient.h:47

caffe2::LpPoolFunctor
Definition: lp_pool_op.cc:9

caffe2::PoolOp
Definition: pool_op.h:15

caffe2::PoolGradientOp
Definition: pool_op.h:108

caffe2
A global dictionary that holds information about what Caffe2 modules have been loaded in the current ...
Definition: blob.h:13

caffe2::GetPoolGradient
Definition: lp_pool_op.cc:307

caffe2::OperatorBase
Definition: operator.h:38

caffe2::ConvPoolOpBase
Definition: conv_pool_op_base.h:29