Caffe2 - C++ API
A deep learning, cross platform ML framework
lp_pool_op.cc
1 // TODO: reduce the apparent redundancy of all the code below.
2 #include "caffe2/operators/pool_op.h"
3 
4 namespace caffe2 {
5 
6 using std::max;
7 using std::min;
8 
9 struct LpPoolFunctor {
10  explicit LpPoolFunctor(const OperatorBase& /* op */) {}
11 };
12 
13 template <>
15  auto& X = Input(0);
16  auto* Y = Output(0);
17  ConvPoolOpBase::SetOutputSize(X, Y, X.dim32(1));
18  const auto p = OperatorBase::GetSingleArgument<float>("p", 2.0);
19  const auto inv_p = 1.0 / p;
20 
21  const float* Xdata = X.data<float>();
22  float* Ydata = Y->template mutable_data<float>();
23  math::Set<float, CPUContext>(Y->numel(), 0, Ydata, &context_);
24  // The main loop
25  int channels = X.dim32(1);
26  int height = X.dim32(2);
27  int width = X.dim32(3);
28  int pooled_height = Y->dim32(2);
29  int pooled_width = Y->dim32(3);
30 
31  for (int n = 0; n < X.dim32(0); ++n) {
32  for (int c = 0; c < channels; ++c) {
33  for (int ph = 0; ph < pooled_height; ++ph) {
34  for (int pw = 0; pw < pooled_width; ++pw) {
35  int hstart = ph * stride_[0] - pads_[0];
36  int wstart = pw * stride_[1] - pads_[1];
37  int hend = min(hstart + kernel_[0], height);
38  int wend = min(wstart + kernel_[1], width);
39  hstart = max(hstart, 0);
40  wstart = max(wstart, 0);
41  const int pool_index = ph * pooled_width + pw;
42  for (int h = hstart; h < hend; ++h) {
43  for (int w = wstart; w < wend; ++w) {
44  const int input_index = h * width + w;
45  Ydata[pool_index] += std::pow(std::abs(Xdata[input_index]), p);
46  }
47  }
48  Ydata[pool_index] = std::pow(Ydata[pool_index], inv_p);
49  }
50  }
51  // Do offset.
52  Xdata += height * width;
53  Ydata += pooled_height * pooled_width;
54  }
55  }
56  return true;
57 }
58 
59 template <>
61  auto& X = Input(0);
62  auto* Y = Output(0);
63  int height = X.dim32(1);
64  int width = X.dim32(2);
65  int channels = X.dim32(3);
66  ConvPoolOpBase::SetOutputSize(X, Y, channels);
67 
68  const auto p = OperatorBase::GetSingleArgument<float>("p", 2.0);
69  const auto inv_p = 1.0 / p;
70 
71  const float* Xdata = X.data<float>();
72  float* Ydata = Y->template mutable_data<float>();
73  math::Set<float, CPUContext>(Y->numel(), 0, Ydata, &context_);
74  // The main loop
75  int pooled_height = Y->dim32(1);
76  int pooled_width = Y->dim32(2);
77  for (int n = 0; n < X.dim32(0); ++n) {
78  for (int ph = 0; ph < pooled_height; ++ph) {
79  for (int pw = 0; pw < pooled_width; ++pw) {
80  int hstart = ph * stride_[0] - pads_[0];
81  int wstart = pw * stride_[1] - pads_[1];
82  int hend = min(hstart + kernel_[0], height);
83  int wend = min(wstart + kernel_[1], width);
84  hstart = max(hstart, 0);
85  wstart = max(wstart, 0);
86  const int pool_index = (ph * pooled_width + pw) * channels;
87  for (int h = hstart; h < hend; ++h) {
88  for (int w = wstart; w < wend; ++w) {
89  const int input_index = (h * width + w) * channels;
90  for (int c = 0; c < channels; ++c) {
91  Ydata[pool_index + c] +=
92  std::pow(std::abs(Xdata[input_index + c]), p);
93  }
94  }
95  }
96  for (int c = 0; c < channels; ++c) {
97  Ydata[pool_index + c] = std::pow(Ydata[pool_index + c], inv_p);
98  }
99  }
100  }
101  // Do offset.
102  Xdata += X.numel() / X.dim32(0);
103  Ydata += Y->numel() / Y->dim32(0);
104  }
105  return true;
106 }
107 
108 template <>
111  const auto& X = Input(0);
112  const auto& Y = Input(1);
113  auto& dY = Input(2);
114 
115  const auto p = OperatorBase::GetSingleArgument<float>("p", 2.0);
116  const auto inv_p = 1.0 / p;
117 
118  // TODO(Yangqing): Add shape checks.
119  auto* dX = Output(0, X.sizes(), at::dtype<float>());
120  math::Set<float, CPUContext>(
121  X.numel(), 0, dX->template mutable_data<float>(), &context_);
122  const float* dYdata = dY.data<float>();
123  const float* Xdata = X.data<float>();
124  const float* Ydata = Y.data<float>();
125  float* dXdata = dX->template mutable_data<float>();
126 
127  int channels = X.dim32(1);
128  CAFFE_ENFORCE_EQ(channels, dY.dim32(1));
129  int height = X.dim32(2);
130  int width = X.dim32(3);
132  int pooled_height = dY.dim32(2);
133  int pooled_width = dY.dim32(3);
134  // The main loop
135  for (int n = 0; n < X.dim32(0); ++n) {
136  for (int c = 0; c < channels; ++c) {
137  for (int ph = 0; ph < pooled_height; ++ph) {
138  for (int pw = 0; pw < pooled_width; ++pw) {
139  int hstart = ph * stride_[0] - pads_[0];
140  int wstart = pw * stride_[1] - pads_[1];
141  int hend = min(hstart + kernel_[0], height);
142  int wend = min(wstart + kernel_[1], width);
143  hstart = max(hstart, 0);
144  wstart = max(wstart, 0);
145  float scale = 1. / (hend - hstart) / (wend - wstart);
146  for (int h = hstart; h < hend; ++h) {
147  for (int w = wstart; w < wend; ++w) {
148  // gradient of p-norm is x_j * |x_j|^{p-2} / |x|_p^{p-1}
149  dXdata[h * width + w] += dYdata[ph * pooled_width + pw] *
150  Xdata[h * width + w] *
151  std::pow(std::abs(Xdata[h * width + w]), p - 2) /
152  std::pow(Ydata[ph * pooled_width + pw], p - 1);
153  }
154  }
155  }
156  }
157  // offset
158  dXdata += height * width;
159  dYdata += pooled_height * pooled_width;
160  Ydata += pooled_height * pooled_width;
161  Xdata += height * width;
162  }
163  }
164  return true;
165 }
166 
167 template <>
170  const auto& X = Input(0);
171  const auto& Y = Input(1);
172  auto& dY = Input(2);
173  CAFFE_ENFORCE_EQ(dY.dim(), 4);
174 
175  // TODO(Yangqing): Add shape checks.
176  auto* dX = Output(0, X.sizes(), at::dtype<float>());
177  math::Set<float, CPUContext>(
178  X.numel(), 0, dX->template mutable_data<float>(), &context_);
179  const float* dYdata = dY.data<float>();
180  float* dXdata = dX->template mutable_data<float>();
181  const float* Xdata = X.data<float>();
182  const float* Ydata = Y.data<float>();
183  // The main loop
184  int height = X.dim32(1);
185  int width = X.dim32(2);
187  const auto p = OperatorBase::GetSingleArgument<float>("p", 2.0);
188  const auto inv_p = 1.0 / p;
189 
190  int pooled_height = dY.dim32(1);
191  int pooled_width = dY.dim32(2);
192  int channels = X.dim32(3);
193  CAFFE_ENFORCE_EQ(channels, dY.dim32(3));
194  for (int n = 0; n < X.dim32(0); ++n) {
195  for (int ph = 0; ph < pooled_height; ++ph) {
196  for (int pw = 0; pw < pooled_width; ++pw) {
197  int hstart = ph * stride_[0] - pads_[0];
198  int wstart = pw * stride_[1] - pads_[1];
199  int hend = min(hstart + kernel_[0], height);
200  int wend = min(wstart + kernel_[1], width);
201  hstart = max(hstart, 0);
202  wstart = max(wstart, 0);
203  float scale = 1. / (hend - hstart) / (wend - wstart);
204  for (int h = hstart; h < hend; ++h) {
205  for (int w = wstart; w < wend; ++w) {
206  for (int c = 0; c < channels; ++c) {
207  dXdata[(h * width + w) * channels + c] +=
208  dYdata[(ph * pooled_width + pw) * channels + c] *
209  Xdata[(h * width + w) * channels + c] *
210  std::pow(
211  std::abs(Xdata[(h * width + w) * channels + c]), p - 2) /
212  std::pow(
213  Ydata[(ph * pooled_width + pw) * channels + c], p - 1);
214  }
215  }
216  }
217  }
218  }
219  // offset
220  dXdata += X.numel() / X.dim32(0);
221  dYdata += dY.numel() / dY.dim32(0);
222  Xdata += X.numel() / X.dim32(0);
223  Ydata += Y.numel() / Y.dim32(0);
224  }
225  return true;
226 }
227 
228 REGISTER_CPU_OPERATOR(LpPool, PoolOp<float, CPUContext, LpPoolFunctor>);
229 REGISTER_CPU_OPERATOR(
230  LpPoolGradient,
232 
233 OPERATOR_SCHEMA(LpPool)
234  .NumInputs(1)
235  .NumOutputs(1)
236  .SetDoc(R"DOC(
237 `LpPool` consumes an input blob and applies max pooling across the the blob according to kernel sizes, stride sizes, pad lengths and dilation. $L_p$ pooling consists of taking the $L_p$ norm of a subset of the input tensor according to the kernel size and downsampling the data into the output blob for further processing.
238 
239 Pooling layers reduce the spatial dimensionality of the input blob. Each of the output blob's dimensions will reduce according to:
240 
241 $$dim_{out}=\frac{dim_{in}-kernel+2*pad}{stride}+1$$
242 
243 Github Links:
244 - https://github.com/pytorch/pytorch/blob/master/caffe2/operators/lp_pool_op.cc
245 
246 <details>
247 
248 <summary> <b>Example</b> </summary>
249 
250 **Code**
251 
252 ```
253 
254 workspace.ResetWorkspace()
255 
256 op = core.CreateOperator(
257  "LpPool",
258  ["X"],
259  ["Y"],
260  kernel=2,
261  stride=2,
262  p=2.0
263 )
264 
265 workspace.FeedBlob("X", np.random.randn(1, 1, 6, 6).astype(np.float32)) // NCHW
266 print("X:\n", workspace.FetchBlob("X"), "\n")
267 workspace.RunOperatorOnce(op)
268 print("Y:\n", workspace.FetchBlob("Y"))
269 
270 ```
271 
272 **Result**
273 
274 ```
275 
276 X:
277  [[[[-1.1113514 -1.1173418 -0.1504435 0.1327146 -1.2221841 -0.5654315 ]
278  [-1.9209646 -0.04675794 0.8604731 1.2042469 0.28154245 0.38656202]
279  [-0.8772837 -0.03264008 0.26222762 0.28526652 0.321102 -2.5891325 ]
280  [-0.9248281 1.440776 -0.56832 -0.6017927 1.2262512 -2.1443934 ]
281  [ 0.5194415 -1.6858683 0.45221648 0.65029615 -0.8574544 0.8121054 ]
282  [ 0.25902653 0.4934758 0.49870652 -0.48134378 -0.9178449 -0.07626943]]]]
283 
284 Y:
285  [[[[2.4851248 1.49361 1.4290358]
286  [1.9240153 0.9139378 3.5928857]
287  [1.8500228 1.0525136 1.4976646]]]]
288 
289 ```
290 
291 </details>
292 
293 )DOC")
294  .Arg("p", "(*float*): type of $L_p$ norm to use (default=2.0)")
295  .Arg("kernel", "(*int*): the size of the window to take a max over")
296  .Arg("stride", "(*int*): the stride of the window")
297  .Arg("pad", "(*int*): implicit zero padding to be added on both sides")
298  .Arg(
299  "dilation",
300  "(*int*): parameter that controls the stride of elements in the window")
301  .Arg("order", "(*string*): order of blob dimensions (default=\"NCHW\")")
302  .Input(0, "X", "(*Tensor`<float>`*): input tensor")
303  .Output(0, "Y", "(*Tensor`<float>`*): output tensor");
304 
305 OPERATOR_SCHEMA(LpPoolGradient).NumInputs(3).NumOutputs(1);
306 
308  using GradientMakerBase::GradientMakerBase;
309  vector<OperatorDef> GetGradientDefs() override {
310  return SingleGradientDef(
311  def_.type() + "Gradient",
312  "",
313  vector<string>{I(0), O(0), GO(0)},
314  vector<string>{GI(0)});
315  }
316 };
317 REGISTER_GRADIENT(LpPool, GetPoolGradient);
318 } // namespace caffe2
A global dictionary that holds information about what Caffe2 modules have been loaded in the current ...
Definition: blob.h:13