Caffe2 - C++ API
A deep learning, cross platform ML framework
pad_op.cc
1 
17 #include "caffe2/operators/pad_op.h"
18 
19 #include <algorithm>
20 
21 namespace caffe2 {
22 
23 PadMode StringToPadMode(const string& mode) {
24  if (mode == "constant") {
25  return PadMode::CONSTANT;
26  } else if (mode == "reflect") {
27  return PadMode::REFLECT;
28  } else if (mode == "edge") {
29  return PadMode::EDGE;
30  } else {
31  CAFFE_THROW("Unknown padding mode: " + mode);
32  }
33 }
34 
35 using std::min;
36 using std::max;
37 
38 template <>
39 bool PadImageOp<float, CPUContext>::RunOnDeviceWithOrderNCHW() {
40  auto& X = Input(0);
41  auto* Y = Output(0);
42  int channels = X.dim32(1);
43  int height = X.dim32(2);
44  int width = X.dim32(3);
45  ConvPoolOpBase::SetOutputSize(X, Y, channels);
46 
47  const float* Xdata = X.data<float>();
48  float* Ydata = Y->mutable_data<float>();
49  // The main loop
50  int padded_height = Y->dim32(2);
51  int padded_width = Y->dim32(3);
52 
53  switch (mode_) {
54  case PadMode::CONSTANT:
55  for (int n = 0; n < X.dim32(0); ++n) {
56  for (int c = 0; c < channels; ++c) {
57  for (int ph = 0; ph < padded_height; ++ph) {
58  for (int pw = 0; pw < padded_width; ++pw) {
59  int h = ph - pad_t();
60  int w = pw - pad_l();
61  Ydata[ph * padded_width + pw] =
62  (h < 0 || w < 0 || h >= height || w >= width)
63  ? value_
64  : Xdata[h * width + w];
65  }
66  }
67  // Do offset.
68  Xdata += height * width;
69  Ydata += padded_height * padded_width;
70  }
71  }
72  break;
73  case PadMode::REFLECT:
74  if (pad_r() >= 0 && pad_t() >= 0 && pad_l() >= 0 && pad_b() >= 0) {
75  for (int n = 0; n < X.dim32(0); ++n) {
76  for (int c = 0; c < channels; ++c) {
77  // Handle the valid region:
78  // i.e. Y[n][c][pad_t:pad_t+h][pad_l:pad_l+w]
79  auto* Ystart = Ydata + pad_t() * padded_width + pad_l();
80  math::CopyMatrix<CPUContext>(
81  sizeof(float),
82  height,
83  width,
84  Xdata,
85  width,
86  Ystart,
87  padded_width,
88  &context_);
89 
90 // Fixup areas where we need to reflect
91 #define X(ph, pw) \
92  int h = ph - pad_t(); \
93  int w = pw - pad_l(); \
94  h = max(h, -h); \
95  h = min(h, 2 * height - h - 2); \
96  w = max(w, -w); \
97  w = min(w, 2 * width - w - 2); \
98  Ydata[ph * padded_width + pw] = Xdata[h * width + w]
99 
100  // Top part
101  for (int ph = 0; ph < pad_t(); ++ph) {
102  for (int pw = 0; pw < padded_width; ++pw) {
103  X(ph, pw);
104  }
105  }
106 
107  // Bottom part
108  for (int ph = padded_height - pad_b(); ph < padded_height; ++ph) {
109  for (int pw = 0; pw < padded_width; ++pw) {
110  X(ph, pw);
111  }
112  }
113 
114  // Interior
115  for (int ph = pad_t(); ph < padded_height - pad_b(); ++ph) {
116  // Left
117  for (int pw = 0; pw < pad_l(); ++pw) {
118  X(ph, pw);
119  }
120  // Right
121  for (int pw = padded_width - pad_r(); pw < padded_width; ++pw) {
122  X(ph, pw);
123  }
124  }
125 #undef X
126 
127  // Do offset.
128  Xdata += height * width;
129  Ydata += padded_height * padded_width;
130  }
131  }
132  } else {
133  for (int n = 0; n < X.dim32(0); ++n) {
134  for (int c = 0; c < channels; ++c) {
135  for (int ph = 0; ph < padded_height; ++ph) {
136  for (int pw = 0; pw < padded_width; ++pw) {
137  int h = ph - pad_t();
138  int w = pw - pad_l();
139  // max(h, -h) does reflection over 0
140  h = max(h, -h);
141  // min(h, 2 * height - h - 2) does reflection over height.
142  h = min(h, 2 * height - h - 2);
143  w = max(w, -w);
144  w = min(w, 2 * width - w - 2);
145  Ydata[ph * padded_width + pw] = Xdata[h * width + w];
146  }
147  }
148  // Do offset.
149  Xdata += height * width;
150  Ydata += padded_height * padded_width;
151  }
152  }
153  }
154  break;
155  case PadMode::EDGE:
156  for (int n = 0; n < X.dim32(0); ++n) {
157  for (int c = 0; c < channels; ++c) {
158  for (int ph = 0; ph < padded_height; ++ph) {
159  for (int pw = 0; pw < padded_width; ++pw) {
160  // Bounds to the right range.
161  int h = min(height - 1, max(ph - pad_t(), 0));
162  int w = min(width - 1, max(pw - pad_l(), 0));
163  Ydata[ph * padded_width + pw] = Xdata[h * width + w];
164  }
165  }
166  // Do offset.
167  Xdata += height * width;
168  Ydata += padded_height * padded_width;
169  }
170  }
171  break;
172  }
173  return true;
174 }
175 
176 template <>
177 bool PadImageOp<float, CPUContext>::RunOnDeviceWithOrderNHWC() {
178  auto& X = Input(0);
179  auto* Y = Output(0);
180  int height = X.dim32(1);
181  int width = X.dim32(2);
182  int channels = X.dim32(3);
183  ConvPoolOpBase::SetOutputSize(X, Y, channels);
184  const float* Xdata = X.data<float>();
185  float* Ydata = Y->mutable_data<float>();
186 
187  // The main loop
188  int padded_height = Y->dim32(1);
189  int padded_width = Y->dim32(2);
190 
191  switch (mode_) {
192  case PadMode::CONSTANT:
193  for (int n = 0; n < X.dim32(0); ++n) {
194  for (int ph = 0; ph < padded_height; ++ph) {
195  for (int pw = 0; pw < padded_width; ++pw) {
196  int h = ph - pad_t();
197  int w = pw - pad_l();
198  const int pad_index = (ph * padded_width + pw) * channels;
199  if (h < 0 || w < 0 || h >= height || w >= width) {
200  for (int c = 0; c < channels; ++c) {
201  Ydata[pad_index + c] = value_;
202  }
203  } else {
204  const int input_index = (h * width + w) * channels;
205  for (int c = 0; c < channels; ++c) {
206  Ydata[pad_index + c] = Xdata[input_index + c];
207  }
208  }
209  }
210  }
211  // Do offset.
212  Xdata += X.size() / X.dim32(0);
213  Ydata += Y->size() / Y->dim32(0);
214  }
215  break;
216  case PadMode::REFLECT:
217  for (int n = 0; n < X.dim32(0); ++n) {
218  for (int ph = 0; ph < padded_height; ++ph) {
219  for (int pw = 0; pw < padded_width; ++pw) {
220  const int pad_index = (ph * padded_width + pw) * channels;
221  int h = ph - pad_t();
222  int w = pw - pad_l();
223  // max(h, -h) does reflection over 0
224  h = max(h, -h);
225  // min(h, 2 * height - h - 2) does reflection over height.
226  h = min(h, 2 * height - h - 2);
227  w = max(w, -w);
228  w = min(w, 2 * width - w - 2);
229  const int input_index = (h * width + w) * channels;
230  for (int c = 0; c < channels; ++c) {
231  Ydata[pad_index + c] = Xdata[input_index + c];
232  }
233  }
234  }
235  // Do offset.
236  Xdata += X.size() / X.dim32(0);
237  Ydata += Y->size() / Y->dim32(0);
238  }
239  break;
240  case PadMode::EDGE:
241  for (int n = 0; n < X.dim32(0); ++n) {
242  for (int ph = 0; ph < padded_height; ++ph) {
243  for (int pw = 0; pw < padded_width; ++pw) {
244  const int pad_index = (ph * padded_width + pw) * channels;
245  int h = min(height - 1, max(ph - pad_t(), 0));
246  int w = min(width - 1, max(pw - pad_l(), 0));
247  const int input_index = (h * width + w) * channels;
248  for (int c = 0; c < channels; ++c) {
249  Ydata[pad_index + c] = Xdata[input_index + c];
250  }
251  }
252  }
253  // Do offset.
254  Xdata += X.size() / X.dim32(0);
255  Ydata += Y->size() / Y->dim32(0);
256  }
257  break;
258  }
259  return true;
260 }
261 
262 template <>
263 bool PadImageGradientOp<float, CPUContext>::RunOnDeviceWithOrderNCHW() {
264  auto& dY = Input(0);
265  auto* dX = Output(0);
266  dX->Resize(
267  dY.dim32(0),
268  dY.dim32(1),
269  dY.dim32(2) - pad_t() - pad_b(),
270  dY.dim32(3) - pad_l() - pad_r());
271  int padded_height = dY.dim32(2);
272  int padded_width = dY.dim32(3);
273  int channels = dX->dim32(1);
274  int height = dX->dim32(2);
275  int width = dX->dim32(3);
276 
277  const float* dYdata = dY.data<float>();
278  float* dXdata = dX->mutable_data<float>();
279  math::Set<float, CPUContext>(dX->size(), 0, dXdata, &context_);
280  // The main loop
281  switch (mode_) {
282  case PadMode::CONSTANT:
283  for (int n = 0; n < dY.dim32(0); ++n) {
284  for (int c = 0; c < channels; ++c) {
285  for (int ph = 0; ph < padded_height; ++ph) {
286  for (int pw = 0; pw < padded_width; ++pw) {
287  int h = ph - pad_t();
288  int w = pw - pad_l();
289  if (!(h < 0 || w < 0 || h >= height || w >= width)) {
290  dXdata[h * width + w] += dYdata[ph * padded_width + pw];
291  }
292  }
293  }
294  // Do offset.
295  dXdata += height * width;
296  dYdata += padded_height * padded_width;
297  }
298  }
299  break;
300  case PadMode::REFLECT:
301  for (int n = 0; n < dY.dim32(0); ++n) {
302  for (int c = 0; c < channels; ++c) {
303  for (int ph = 0; ph < padded_height; ++ph) {
304  for (int pw = 0; pw < padded_width; ++pw) {
305  int h = ph - pad_t();
306  int w = pw - pad_l();
307  // max(h, -h) does reflection over 0
308  h = max(h, -h);
309  // min(h, 2 * height - h - 2) does reflection over height.
310  h = min(h, 2 * height - h - 2);
311  w = max(w, -w);
312  w = min(w, 2 * width - w - 2);
313  dXdata[h * width + w] += dYdata[ph * padded_width + pw];
314  }
315  }
316  // Do offset.
317  dXdata += height * width;
318  dYdata += padded_height * padded_width;
319  }
320  }
321  break;
322  case PadMode::EDGE:
323  for (int n = 0; n < dY.dim32(0); ++n) {
324  for (int c = 0; c < channels; ++c) {
325  for (int ph = 0; ph < padded_height; ++ph) {
326  for (int pw = 0; pw < padded_width; ++pw) {
327  int h = min(height - 1, max(ph - pad_t(), 0));
328  int w = min(width - 1, max(pw - pad_l(), 0));
329  dXdata[h * width + w] += dYdata[ph * padded_width + pw];
330  }
331  }
332  // Do offset.
333  dXdata += height * width;
334  dYdata += padded_height * padded_width;
335  }
336  }
337  break;
338  }
339  return true;
340 }
341 
342 template <>
343 bool PadImageGradientOp<float, CPUContext>::RunOnDeviceWithOrderNHWC() {
344  auto& dY = Input(0);
345  auto* dX = Output(0);
346  dX->Resize(
347  dY.dim32(0),
348  dY.dim32(1) - pad_t() - pad_b(),
349  dY.dim32(2) - pad_l() - pad_r(),
350  dY.dim32(3));
351  int padded_height = dY.dim32(1);
352  int padded_width = dY.dim32(2);
353  int channels = dY.dim32(3);
354  int height = dX->dim32(1);
355  int width = dX->dim32(2);
356 
357  const float* dYdata = dY.data<float>();
358  float* dXdata = dX->mutable_data<float>();
359  math::Set<float, CPUContext>(dX->size(), 0, dXdata, &context_);
360 
361  switch (mode_) {
362  case PadMode::CONSTANT:
363  for (int n = 0; n < dY.dim32(0); ++n) {
364  for (int ph = 0; ph < padded_height; ++ph) {
365  for (int pw = 0; pw < padded_width; ++pw) {
366  int h = ph - pad_t();
367  int w = pw - pad_l();
368  const int pad_index = (ph * padded_width + pw) * channels;
369  if (!(h < 0 || w < 0 || h >= height || w >= width)) {
370  const int input_index = (h * width + w) * channels;
371  for (int c = 0; c < channels; ++c) {
372  dXdata[input_index + c] += dYdata[pad_index + c];
373  }
374  }
375  }
376  }
377  // Do offset.
378  dXdata += dX->size() / dX->dim32(0);
379  dYdata += dY.size() / dY.dim32(0);
380  }
381  break;
382  case PadMode::REFLECT:
383  for (int n = 0; n < dY.dim32(0); ++n) {
384  for (int ph = 0; ph < padded_height; ++ph) {
385  for (int pw = 0; pw < padded_width; ++pw) {
386  const int pad_index = (ph * padded_width + pw) * channels;
387  int h = ph - pad_t();
388  int w = pw - pad_l();
389  // max(h, -h) does reflection over 0
390  h = max(h, -h);
391  // min(h, 2 * height - h - 2) does reflection over height.
392  h = min(h, 2 * height - h - 2);
393  w = max(w, -w);
394  w = min(w, 2 * width - w - 2);
395  const int input_index = (h * width + w) * channels;
396  for (int c = 0; c < channels; ++c) {
397  dXdata[input_index + c] += dYdata[pad_index + c];
398  }
399  }
400  }
401  // Do offset.
402  dXdata += dX->size() / dX->dim32(0);
403  dYdata += dY.size() / dY.dim32(0);
404  }
405  break;
406  case PadMode::EDGE:
407  for (int n = 0; n < dY.dim32(0); ++n) {
408  for (int ph = 0; ph < padded_height; ++ph) {
409  for (int pw = 0; pw < padded_width; ++pw) {
410  const int pad_index = (ph * padded_width + pw) * channels;
411  // Bounds to the right range.
412  int h = min(height - 1, max(ph - pad_t(), 0));
413  int w = min(width - 1, max(pw - pad_l(), 0));
414  const int input_index = (h * width + w) * channels;
415  for (int c = 0; c < channels; ++c) {
416  dXdata[input_index + c] += dYdata[pad_index + c];
417  }
418  }
419  }
420  // Do offset.
421  dXdata += dX->size() / dX->dim32(0);
422  dYdata += dY.size() / dY.dim32(0);
423  }
424  break;
425  }
426  return true;
427 }
428 
429 template <>
430 std::vector<TensorShape> PadImageOp<float, CPUContext>::PadTensorInference(
431  const OperatorDef& def,
432  const vector<TensorShape>& in) {
433  return ConvPoolOpBase::TensorInferenceForPool(def, in);
434 }
435 
436 REGISTER_CPU_OPERATOR(PadImage, PadImageOp<float, CPUContext>);
437 REGISTER_CPU_OPERATOR(PadImageGradient, PadImageGradientOp<float, CPUContext>);
438 
439 OPERATOR_SCHEMA(PadImage)
440  .NumInputs(1)
441  .NumOutputs(1)
442  .TensorInferenceFunction(PadImageOp<float, CPUContext>::PadTensorInference)
443  .SetDoc(R"DOC(
444 PadImage pads values around the boundary of an image according to the pad
445 values and stride sizes defined by the ConvPoolOpBase operator.
446  )DOC")
447  .Input(
448  0,
449  "X",
450  "Input data tensor from the previous operator; dimensions "
451  "depend on whether the NCHW or NHWC operators are being used. For example, "
452  "in the former, the input has size (N x C x H x W), where N is the batch "
453  "size, C is the number of channels, and H and W are the height and the width "
454  "of the data. The corresponding permutation of dimensions is used in the "
455  "latter case. ")
456  .Output(
457  0,
458  "Y",
459  "Output data tensor from padding the H and W dimensions on "
460  "the tensor. Dimensions will vary based on various pad and stride "
461  "sizes.");
463 OPERATOR_SCHEMA(PadImageGradient).NumInputs(1).NumOutputs(1);
464 
465 class GetPadImageGradient : public GradientMakerBase {
466  using GradientMakerBase::GradientMakerBase;
467  vector<OperatorDef> GetGradientDefs() override {
468  return SingleGradientDef(
469  "PadImageGradient", "", vector<string>{GO(0)}, vector<string>{GI(0)});
470  }
471 };
472 REGISTER_GRADIENT(PadImage, GetPadImageGradient);
473 
474 } // namespace caffe2
Copyright (c) 2016-present, Facebook, Inc.
static vector< OperatorDef > SingleGradientDef(const Args &...args)
a helper function to allow one to create one single operator def, which is usually the case for many ...