Caffe2 - C++ API
A deep learning, cross platform ML framework
lp_pool_op.cc
1 
17 // TODO: reduce the apparent redundancy of all the code below.
18 #include "caffe2/operators/pool_op.h"
19 
20 namespace caffe2 {
21 
22 using std::min;
23 using std::max;
24 
25 class LpPool {};
26 
27 template <>
29  auto& X = Input(0);
30  auto* Y = Output(0);
31  ConvPoolOpBase::SetOutputSize(X, Y, X.dim32(1));
32  const auto p = OperatorBase::GetSingleArgument<float>("p", 2.0);
33  const auto inv_p = 1.0 / p;
34 
35  const float* Xdata = X.data<float>();
36  float* Ydata = Y->mutable_data<float>();
37  math::Set<float, CPUContext>(Y->size(), 0, Ydata, &context_);
38  // The main loop
39  int channels = X.dim32(1);
40  int height = X.dim32(2);
41  int width = X.dim32(3);
42  int pooled_height = Y->dim32(2);
43  int pooled_width = Y->dim32(3);
44 
45  for (int n = 0; n < X.dim32(0); ++n) {
46  for (int c = 0; c < channels; ++c) {
47  for (int ph = 0; ph < pooled_height; ++ph) {
48  for (int pw = 0; pw < pooled_width; ++pw) {
49  int hstart = ph * stride_[0] - pads_[0];
50  int wstart = pw * stride_[1] - pads_[1];
51  int hend = min(hstart + kernel_[0], height);
52  int wend = min(wstart + kernel_[1], width);
53  hstart = max(hstart, 0);
54  wstart = max(wstart, 0);
55  const int pool_index = ph * pooled_width + pw;
56  for (int h = hstart; h < hend; ++h) {
57  for (int w = wstart; w < wend; ++w) {
58  const int input_index = h * width + w;
59  Ydata[pool_index] += std::pow(std::abs(Xdata[input_index]), p);
60  }
61  }
62  Ydata[pool_index] = std::pow(Ydata[pool_index], inv_p);
63  }
64  }
65  // Do offset.
66  Xdata += height * width;
67  Ydata += pooled_height * pooled_width;
68  }
69  }
70  return true;
71 }
72 
73 template <>
75  auto& X = Input(0);
76  auto* Y = Output(0);
77  int height = X.dim32(1);
78  int width = X.dim32(2);
79  int channels = X.dim32(3);
80  ConvPoolOpBase::SetOutputSize(X, Y, channels);
81 
82  const auto p = OperatorBase::GetSingleArgument<float>("p", 2.0);
83  const auto inv_p = 1.0 / p;
84 
85  const float* Xdata = X.data<float>();
86  float* Ydata = Y->mutable_data<float>();
87  math::Set<float, CPUContext>(Y->size(), 0, Ydata, &context_);
88  // The main loop
89  int pooled_height = Y->dim32(1);
90  int pooled_width = Y->dim32(2);
91  for (int n = 0; n < X.dim32(0); ++n) {
92  for (int ph = 0; ph < pooled_height; ++ph) {
93  for (int pw = 0; pw < pooled_width; ++pw) {
94  int hstart = ph * stride_[0] - pads_[0];
95  int wstart = pw * stride_[1] - pads_[1];
96  int hend = min(hstart + kernel_[0], height);
97  int wend = min(wstart + kernel_[1], width);
98  hstart = max(hstart, 0);
99  wstart = max(wstart, 0);
100  const int pool_index = (ph * pooled_width + pw) * channels;
101  for (int h = hstart; h < hend; ++h) {
102  for (int w = wstart; w < wend; ++w) {
103  const int input_index = (h * width + w) * channels;
104  for (int c = 0; c < channels; ++c) {
105  Ydata[pool_index + c] +=
106  std::pow(std::abs(Xdata[input_index + c]), p);
107  }
108  }
109  }
110  for (int c = 0; c < channels; ++c) {
111  Ydata[pool_index + c] = std::pow(Ydata[pool_index + c], inv_p);
112  }
113  }
114  }
115  // Do offset.
116  Xdata += X.size() / X.dim32(0);
117  Ydata += Y->size() / Y->dim32(0);
118  }
119  return true;
120 }
121 
122 template <>
124  const auto& X = Input(0);
125  const auto& Y = Input(1);
126  auto& dY = Input(2);
127  auto* dX = Output(0);
128  const auto p = OperatorBase::GetSingleArgument<float>("p", 2.0);
129  const auto inv_p = 1.0 / p;
130 
131  // TODO(Yangqing): Add shape checks.
132  dX->ResizeLike(X);
133  math::Set<float, CPUContext>(
134  X.size(), 0, dX->mutable_data<float>(), &context_);
135  const float* dYdata = dY.data<float>();
136  const float* Xdata = X.data<float>();
137  const float* Ydata = Y.data<float>();
138  float* dXdata = dX->mutable_data<float>();
139 
140  int channels = X.dim32(1);
141  CAFFE_ENFORCE_EQ(channels, dY.dim32(1));
142  int height = X.dim32(2);
143  int width = X.dim32(3);
145  int pooled_height = dY.dim32(2);
146  int pooled_width = dY.dim32(3);
147  // The main loop
148  for (int n = 0; n < X.dim32(0); ++n) {
149  for (int c = 0; c < channels; ++c) {
150  for (int ph = 0; ph < pooled_height; ++ph) {
151  for (int pw = 0; pw < pooled_width; ++pw) {
152  int hstart = ph * stride_[0] - pads_[0];
153  int wstart = pw * stride_[1] - pads_[1];
154  int hend = min(hstart + kernel_[0], height);
155  int wend = min(wstart + kernel_[1], width);
156  hstart = max(hstart, 0);
157  wstart = max(wstart, 0);
158  float scale = 1. / (hend - hstart) / (wend - wstart);
159  for (int h = hstart; h < hend; ++h) {
160  for (int w = wstart; w < wend; ++w) {
161  // gradient of p-norm is x_j * |x_j|^{p-2} / |x|_p^{p-1}
162  dXdata[h * width + w] += dYdata[ph * pooled_width + pw] *
163  Xdata[h * width + w] *
164  std::pow(std::abs(Xdata[h * width + w]), p - 2) /
165  std::pow(Ydata[ph * pooled_width + pw], p - 1);
166  }
167  }
168  }
169  }
170  // offset
171  dXdata += height * width;
172  dYdata += pooled_height * pooled_width;
173  Ydata += pooled_height * pooled_width;
174  Xdata += height * width;
175  }
176  }
177  return true;
178 }
179 
180 template <>
182  const auto& X = Input(0);
183  const auto& Y = Input(1);
184  auto& dY = Input(2);
185  CAFFE_ENFORCE_EQ(dY.ndim(), 4);
186  auto* dX = Output(0);
187  // TODO(Yangqing): Add shape checks.
188  dX->ResizeLike(X);
189  math::Set<float, CPUContext>(
190  X.size(), 0, dX->mutable_data<float>(), &context_);
191  const float* dYdata = dY.data<float>();
192  float* dXdata = dX->mutable_data<float>();
193  const float* Xdata = X.data<float>();
194  const float* Ydata = Y.data<float>();
195  // The main loop
196  int height = X.dim32(1);
197  int width = X.dim32(2);
199  const auto p = OperatorBase::GetSingleArgument<float>("p", 2.0);
200  const auto inv_p = 1.0 / p;
201 
202  int pooled_height = dY.dim32(1);
203  int pooled_width = dY.dim32(2);
204  int channels = X.dim32(3);
205  CAFFE_ENFORCE_EQ(channels, dY.dim32(3));
206  for (int n = 0; n < X.dim32(0); ++n) {
207  for (int ph = 0; ph < pooled_height; ++ph) {
208  for (int pw = 0; pw < pooled_width; ++pw) {
209  int hstart = ph * stride_[0] - pads_[0];
210  int wstart = pw * stride_[1] - pads_[1];
211  int hend = min(hstart + kernel_[0], height);
212  int wend = min(wstart + kernel_[1], width);
213  hstart = max(hstart, 0);
214  wstart = max(wstart, 0);
215  float scale = 1. / (hend - hstart) / (wend - wstart);
216  for (int h = hstart; h < hend; ++h) {
217  for (int w = wstart; w < wend; ++w) {
218  for (int c = 0; c < channels; ++c) {
219  dXdata[(h * width + w) * channels + c] +=
220  dYdata[(ph * pooled_width + pw) * channels + c] *
221  Xdata[(h * width + w) * channels + c] *
222  std::pow(
223  std::abs(Xdata[(h * width + w) * channels + c]), p - 2) /
224  std::pow(
225  Ydata[(ph * pooled_width + pw) * channels + c], p - 1);
226  }
227  }
228  }
229  }
230  }
231  // offset
232  dXdata += X.size() / X.dim32(0);
233  dYdata += dY.size() / dY.dim32(0);
234  Xdata += X.size() / X.dim32(0);
235  Ydata += Y.size() / Y.dim32(0);
236  }
237  return true;
238 }
239 
240 REGISTER_CPU_OPERATOR(LpPool, PoolOp<float, CPUContext, LpPool>);
241 REGISTER_CPU_OPERATOR(
242  LpPoolGradient,
244 
245 OPERATOR_SCHEMA(LpPool)
246  .NumInputs(1)
247  .NumOutputs(1)
248  .SetDoc(R"DOC(
249 LpPool consumes an input blob X and applies L-p pooling across the
250 the blob according to kernel sizes, stride sizes, and pad lengths defined by the
251 ConvPoolOpBase operator. L-p pooling consisting of taking the L-p norm of a
252 subset of the input tensor according to the kernel size and downsampling the
253 data into the output blob Y for further processing.
254 )DOC")
255  .Input(
256  0,
257  "X",
258  "Input data tensor from the previous operator; dimensions "
259  "depend on whether the NCHW or NHWC operators are being used. For example, "
260  "in the former, the input has size (N x C x H x W), where N is the batch "
261  "size, C is the number of channels, and H and W are the height and the width "
262  "of the data. The corresponding permutation of dimensions is used in the "
263  "latter case. ")
264  .Output(
265  0,
266  "Y",
267  "Output data tensor from L-p pooling across the input "
268  "tensor. Dimensions will vary based on various kernel, stride, and pad "
269  "sizes.");
270 
271 OPERATOR_SCHEMA(LpPoolGradient).NumInputs(3).NumOutputs(1);
272 
274  using GradientMakerBase::GradientMakerBase;
275  vector<OperatorDef> GetGradientDefs() override {
276  return SingleGradientDef(
277  def_.type() + "Gradient",
278  "",
279  vector<string>{I(0), O(0), GO(0)},
280  vector<string>{GI(0)});
281  }
282 };
283 REGISTER_GRADIENT(LpPool, GetPoolGradient);
284 }
Copyright (c) 2016-present, Facebook, Inc.