1 #include "caffe2/operators/resize_op.h" 3 #include "caffe2/utils/cpu_neon.h" 4 #include "caffe2/utils/math.h" 6 #ifdef CAFFE2_USE_MKLDNN 7 #include "caffe2/ideep/operators/operator_fallback_ideep.h" 8 #include "caffe2/ideep/utils/ideep_operator.h" 13 void resizeNearestNCHW2x(
20 const int output_height = input_height * 2;
21 const int output_width = input_width * 2;
22 for (
int n = 0; n < batch_size; ++n) {
23 for (
int c = 0; c < num_channels; ++c) {
24 for (
int y = 0; y < output_height; ++y) {
25 const int in_y = y / 2;
27 #if defined(__ARM_NEON__) || defined(__ARM_NEON) 28 int vecW = (input_width / 4) * 4;
30 for (; x < vecW; x += 4) {
32 float32x4_t v = vld1q_f32(input + in_y * input_width + x);
33 const int oidx = output_width * y + x * 2;
34 float32x4x2_t v2 = {{v, v}};
36 vst2q_f32(output + oidx + 0, v2);
40 for (; x < input_width; ++x) {
41 const float v = input[in_y * input_width + x];
42 const int oidx = output_width * y + x * 2;
47 for (
int x = 0; x < input_width; ++x) {
48 const float v = input[in_y * input_width + x];
49 const int oidx = output_width * y + x * 2;
55 input += input_height * input_width;
56 output += output_height * output_width;
62 bool ResizeNearestOp<float, CPUContext>::RunOnDeviceWithOrderNCHW() {
63 const auto& X = Input(0);
65 const int batch_size = X.dim32(0),
66 num_channels = X.dim32(1),
67 input_height = X.dim32(2),
68 input_width = X.dim32(3);
69 if (InputSize() == 2) {
70 const auto& scales = Input(1);
71 CAFFE_ENFORCE_EQ(scales.dim(), 1);
72 CAFFE_ENFORCE_EQ(scales.numel(), 2);
73 const float* scales_data = scales.data<
float>();
74 height_scale_ = scales_data[0];
75 width_scale_ = scales_data[1];
78 int output_width = input_width * width_scale_;
79 int output_height = input_height * height_scale_;
82 {batch_size, num_channels, output_height, output_width},
85 const float* Xdata = X.data<
float>();
86 float* Ydata = Y->template mutable_data<float>();
89 if (width_scale_ == 2.0 && height_scale_ == 2.0) {
91 batch_size, num_channels, input_height, input_width, Xdata, Ydata);
95 for (
int n = 0; n < batch_size; ++n) {
96 for (
int c = 0; c < num_channels; ++c) {
97 for (
int y = 0; y < output_height; ++y) {
98 const int in_y = std::min((
int)(y / height_scale_), (input_height - 1));
99 for (
int x = 0; x < output_width; ++x) {
100 const int in_x = std::min((
int)(x / width_scale_), (input_width - 1));
101 Ydata[output_width * y + x] = Xdata[input_width * in_y + in_x];
104 Xdata += input_height * input_width;
105 Ydata += output_width * output_height;
113 bool ResizeNearestOp<float, CPUContext>::RunOnDeviceWithOrderNHWC() {
114 const auto& X = Input(0);
116 const int batch_size = X.dim32(0), input_height = X.dim32(1),
117 input_width = X.dim32(2), num_channels = X.dim32(3);
118 if (InputSize() == 2) {
119 const auto& scales = Input(1);
120 CAFFE_ENFORCE_EQ(scales.dim(), 1);
121 CAFFE_ENFORCE_EQ(scales.numel(), 2);
122 const float* scales_data = scales.data<
float>();
123 height_scale_ = scales_data[0];
124 width_scale_ = scales_data[1];
127 int output_width = input_width * width_scale_;
128 int output_height = input_height * height_scale_;
130 const int output_width_stride = output_width * num_channels;
131 const int input_width_stride = input_width * num_channels;
135 {batch_size, output_height, output_width, num_channels},
138 const float* Xdata = X.data<
float>();
139 float* Ydata = Y->template mutable_data<float>();
141 for (
int n = 0; n < batch_size; ++n) {
142 for (
int y = 0; y < output_height; ++y) {
143 const int in_y = std::min((
int)(y / height_scale_), (input_height - 1));
144 for (
int x = 0; x < output_width; ++x) {
145 const int in_x = std::min((
int)(x / width_scale_), (input_width - 1));
147 &Ydata[output_width_stride * y + num_channels * x],
148 &Xdata[input_width_stride * in_y + num_channels * in_x],
149 num_channels *
sizeof(
float));
152 Xdata += input_height * input_width_stride;
153 Ydata += output_height * output_width_stride;
160 bool ResizeNearestOp<float, CPUContext>::RunOnDevice() {
162 case StorageOrder::NHWC:
163 return RunOnDeviceWithOrderNHWC();
164 case StorageOrder::NCHW:
165 return RunOnDeviceWithOrderNCHW();
167 CAFFE_THROW(
"Unknown Storage order: ", order_);
172 bool ResizeNearestGradientOp<float, CPUContext>::RunOnDeviceWithOrderNCHW() {
173 const auto& dY = Input(0);
174 const auto& X = Input(1);
176 const auto inputDims = dY.sizes();
177 CAFFE_ENFORCE_EQ(4, inputDims.size());
178 const int batch_size = dY.dim32(0),
179 num_channels = dY.dim32(1),
180 input_height = dY.dim32(2),
181 input_width = dY.dim32(3);
182 const int output_height = X.dim32(2);
183 const int output_width = X.dim32(3);
184 if (InputSize() == 3) {
185 const auto& scales = Input(2);
186 CAFFE_ENFORCE_EQ(scales.dim(), 1);
187 CAFFE_ENFORCE_EQ(scales.numel(), 2);
188 const float* scales_data = scales.data<
float>();
189 height_scale_ = scales_data[0];
190 width_scale_ = scales_data[1];
194 {batch_size, num_channels, output_height, output_width},
196 math::Set<float, CPUContext>(
197 dX->numel(), 0.0f, dX->template mutable_data<float>(), &context_);
199 const float* dYdata = dY.data<
float>();
200 float* dXdata = dX->template mutable_data<float>();
202 for (
int n = 0; n < batch_size; ++n) {
203 for (
int c = 0; c < num_channels; ++c) {
204 for (
int y = 0; y < input_height; ++y) {
205 const int out_y = std::min((
int)(y / height_scale_),
206 (output_height - 1));
207 for (
int x = 0; x < input_width; ++x) {
208 const int out_x = std::min((
int)(x / width_scale_),
210 dXdata[output_width * out_y + out_x] += dYdata[input_width * y + x];
213 dYdata += input_height * input_width;
214 dXdata += output_height * output_width;
222 bool ResizeNearestGradientOp<float, CPUContext>::RunOnDeviceWithOrderNHWC() {
223 const auto& dY = Input(0);
224 const auto& X = Input(1);
226 const auto inputDims = dY.sizes();
227 CAFFE_ENFORCE_EQ(4, inputDims.size());
228 const int batch_size = dY.dim32(0), input_height = dY.dim32(1),
229 input_width = dY.dim32(2), num_channels = dY.dim32(3);
230 const int output_height = X.dim32(1);
231 const int output_width = X.dim32(2);
232 if (InputSize() == 3) {
233 const auto& scales = Input(2);
234 CAFFE_ENFORCE_EQ(scales.dim(), 1);
235 CAFFE_ENFORCE_EQ(scales.numel(), 2);
236 const float* scales_data = scales.data<
float>();
237 height_scale_ = scales_data[0];
238 width_scale_ = scales_data[1];
242 {batch_size, output_height, output_width, num_channels},
244 math::Set<float, CPUContext>(
245 dX->numel(), 0.0f, dX->template mutable_data<float>(), &context_);
247 const int output_width_stride = output_width * num_channels;
248 const int input_width_stride = input_width * num_channels;
250 const float* dYdata = dY.data<
float>();
251 float* dXdata = dX->template mutable_data<float>();
253 for (
int n = 0; n < batch_size; ++n) {
254 for (
int y = 0; y < input_height; ++y) {
255 const int out_y = std::min((
int)(y / height_scale_), (output_height - 1));
256 for (
int x = 0; x < input_width; ++x) {
257 const int out_x = std::min((
int)(x / width_scale_), (output_width - 1));
260 dXdata + output_width_stride * out_y + num_channels * out_x;
261 const float* dYdata_c0 =
262 dYdata + input_width_stride * y + num_channels * x;
264 for (
int c = 0; c < num_channels; ++c) {
265 dXdata_c0[c] += dYdata_c0[c];
269 dYdata += input_height * input_width_stride;
270 dXdata += output_height * output_width_stride;
277 bool ResizeNearestGradientOp<float, CPUContext>::RunOnDevice() {
279 case StorageOrder::NHWC:
280 return RunOnDeviceWithOrderNHWC();
281 case StorageOrder::NCHW:
282 return RunOnDeviceWithOrderNCHW();
284 CAFFE_THROW(
"Unknown Storage order: ", order_);
287 REGISTER_CPU_OPERATOR(ResizeNearest, ResizeNearestOp<float, CPUContext>);
288 REGISTER_CPU_GRADIENT_OPERATOR(
289 ResizeNearestGradient,
290 ResizeNearestGradientOp<float, CPUContext>);
292 #ifdef CAFFE2_USE_MKLDNN 293 REGISTER_IDEEP_OPERATOR(
295 IDEEPFallbackOp<ResizeNearestOp<float, CPUContext>>);
299 OPERATOR_SCHEMA(ResizeNearest)
302 .Arg(
"width_scale",
"Scale along width dimension")
303 .Arg(
"height_scale",
"Scale along height dimension")
305 Resizes the spatial dimensions of the input using nearest neighbor 306 interpolation. The `width_scale` and `height_scale` arguments 307 control the size of the output, which is given by: 308 output_width = floor(input_width * width_scale) 309 output_height = floor(output_height * height_scale) 311 .Input(0, "X",
"Input tensor")
315 "1D, 2-element, Scales tensor, [height_scale, width_scale]")
316 .Output(0,
"Y",
"Output tensor")
317 .InheritOnnxSchema(
"Upsample");
320 GRADIENT_OPERATOR_SCHEMA(ResizeNearestGradient)
323 .Arg(
"width_scale",
"Scale along width dimension")
324 .Arg(
"height_scale",
"Scale along height dimension");
327 using GradientMakerBase::GradientMakerBase;
328 vector<OperatorDef> GetGradientDefs()
override {
329 if (def_.input().size() == 2) {
333 "ResizeNearestGradient",
335 vector<string>{GO(0), I(0), I(1)},
336 vector<string>{GI(0)});
340 vector<string>{GO(0), I(0)},
341 vector<string>{GI(0)});
A global dictionary that holds information about what Caffe2 modules have been loaded in the current ...
static vector< OperatorDef > SingleGradientDef(const Args &...args)
a helper function to allow one to create one single operator def, which is usually the case for many ...