Caffe2 - C++ API
A deep learning, cross platform ML framework
resize_op.cc
1 
17 #include "caffe2/operators/resize_op.h"
18 
19 #include "caffe2/utils/cpu_neon.h"
20 #include "caffe2/utils/math.h"
21 
22 namespace caffe2 {
23 
24 void resizeNearest2x(
25  int batch_size,
26  int num_channels,
27  int input_height,
28  int input_width,
29  const float* input,
30  float* output) {
31  const int output_height = input_height * 2;
32  const int output_width = input_width * 2;
33  for (int n = 0; n < batch_size; ++n) {
34  for (int c = 0; c < num_channels; ++c) {
35  for (int y = 0; y < output_height; ++y) {
36  const int in_y = y / 2;
37 
38 #ifdef __ARM_NEON__
39  int vecW = (input_width / 4) * 4; // round down
40  int x = 0;
41  for (; x < vecW; x += 4) {
42  // load 0 1 2 3
43  float32x4_t v = vld1q_f32(input + in_y * input_width + x);
44  const int oidx = output_width * y + x * 2;
45  float32x4x2_t v2 = {{v, v}};
46  // store 00 11 22 33
47  vst2q_f32(output + oidx + 0, v2);
48  }
49 
50  // handle remainder
51  for (; x < input_width; ++x) {
52  const float v = input[in_y * input_width + x];
53  const int oidx = output_width * y + x * 2;
54  output[oidx + 0] = v;
55  output[oidx + 1] = v;
56  }
57 #else
58  for (int x = 0; x < input_width; ++x) {
59  const float v = input[in_y * input_width + x];
60  const int oidx = output_width * y + x * 2;
61  output[oidx + 0] = v;
62  output[oidx + 1] = v;
63  }
64 #endif
65  }
66  input += input_height * input_width;
67  output += output_height * output_width;
68  }
69  }
70 }
71 
72 template <>
73 bool ResizeNearestOp<float, CPUContext>::RunOnDevice() {
74  const auto& X = Input(0);
75  auto* Y = Output(0);
76 
77  const int batch_size = X.dim32(0),
78  num_channels = X.dim32(1),
79  input_height = X.dim32(2),
80  input_width = X.dim32(3);
81  int output_width = input_width * width_scale_;
82  int output_height = input_height * height_scale_;
83  Y->Resize(batch_size, num_channels, output_height, output_width);
84 
85  const float* Xdata = X.data<float>();
86  float* Ydata = Y->mutable_data<float>();
87 
88  // Specialized implementation for fast 2x upsampling
89  if (width_scale_ == 2.0 && height_scale_ == 2.0) {
90  resizeNearest2x(
91  batch_size, num_channels, input_height, input_width, Xdata, Ydata);
92  return true;
93  }
94 
95  for (int n = 0; n < batch_size; ++n) {
96  for (int c = 0; c < num_channels; ++c) {
97  for (int y = 0; y < output_height; ++y) {
98  const int in_y = std::min((int)(y / height_scale_), (input_height - 1));
99  for (int x = 0; x < output_width; ++x) {
100  const int in_x = std::min((int)(x / width_scale_), (input_width - 1));
101  Ydata[output_width * y + x] = Xdata[input_width * in_y + in_x];
102  }
103  }
104  Xdata += input_height * input_width;
105  Ydata += output_width * output_height;
106  }
107  }
108 
109  return true;
110 }
111 
112 template <>
113 bool ResizeNearestGradientOp<float, CPUContext>::RunOnDevice() {
114  const auto& dY = Input(0);
115  const auto& X = Input(1);
116  auto* dX = Output(0);
117 
118  const auto& inputDims = dY.dims();
119  CAFFE_ENFORCE_EQ(4, inputDims.size());
120  const int batch_size = dY.dim32(0),
121  num_channels = dY.dim32(1),
122  input_height = dY.dim32(2),
123  input_width = dY.dim32(3);
124  const int output_height = X.dim32(2);
125  const int output_width = X.dim32(3);
126  dX->Resize(batch_size, num_channels, output_height, output_width);
127  math::Set<float, CPUContext>(dX->size(),
128  0.0f,
129  dX->mutable_data<float>(),
130  &context_);
131 
132  const float* dYdata = dY.data<float>();
133  float* dXdata = dX->mutable_data<float>();
134 
135  for (int n = 0; n < batch_size; ++n) {
136  for (int c = 0; c < num_channels; ++c) {
137  for (int y = 0; y < input_height; ++y) {
138  const int out_y = std::min((int)(y / height_scale_),
139  (output_height - 1));
140  for (int x = 0; x < input_width; ++x) {
141  const int out_x = std::min((int)(x / width_scale_),
142  (output_width - 1));
143  dXdata[output_width * out_y + out_x] += dYdata[input_width * y + x];
144  }
145  }
146  dYdata += input_height * input_width;
147  dXdata += output_height * output_width;
148  }
149  }
150 
151  return true;
152 }
153 
154 REGISTER_CPU_OPERATOR(ResizeNearest, ResizeNearestOp<float, CPUContext>);
155 REGISTER_CPU_OPERATOR(ResizeNearestGradient,
156  ResizeNearestGradientOp<float, CPUContext>);
157 
158 // Input: X, output: Y
159 OPERATOR_SCHEMA(ResizeNearest)
160  .NumInputs(1)
161  .NumOutputs(1)
162  .Arg("width_scale", "Scale along width dimension")
163  .Arg("height_scale", "Scale along height dimension")
164  .SetDoc(R"DOC(
165 Resizes the spatial dimensions of the input using nearest neighbor
166 interpolation. The `width_scale` and `height_scale` arguments
167 control the size of the output, which is given by:
168 output_width = floor(input_width * width_scale)
169 output_height = floor(output_height * height_scale)
170 )DOC")
171  .Input(0, "X", "Input tensor")
172  .Output(0, "Y", "Output tensor");
173 
174 // Input: dY, output: dX
175 OPERATOR_SCHEMA(ResizeNearestGradient)
176  .NumInputs(2)
177  .NumOutputs(1)
178  .Arg("width_scale", "Scale along width dimension")
179  .Arg("height_scale", "Scale along height dimension");
180 
182  using GradientMakerBase::GradientMakerBase;
183  vector<OperatorDef> GetGradientDefs() override {
184  return SingleGradientDef("ResizeNearestGradient",
185  "",
186  vector<string>{GO(0), I(0)},
187  vector<string>{GI(0)});
188  }
189 };
190 REGISTER_GRADIENT(ResizeNearest, GetResizeNearestGradient);
191 
192 } // namespace caffe2
Copyright (c) 2016-present, Facebook, Inc.
static vector< OperatorDef > SingleGradientDef(const Args &...args)
a helper function to allow one to create one single operator def, which is usually the case for many ...