Caffe2 - C++ API
A deep learning, cross platform ML framework
upsample_op.cc
1 
17 #include "caffe2/operators/upsample_op.h"
18 
19 #include "caffe2/utils/cpu_neon.h"
20 #include "caffe2/utils/math.h"
21 
22 namespace caffe2 {
23 
24 template <>
25 bool UpsampleBilinearOp<float, CPUContext>::RunOnDevice() {
26  const auto& X = Input(0);
27 
28  if (InputSize() == 2) {
29  const auto& scales = Input(1);
30  CAFFE_ENFORCE_EQ(scales.dim(), 1);
31  CAFFE_ENFORCE_EQ(scales.numel(), 2);
32  const float* scales_data = scales.data<float>();
33  height_scale_ = scales_data[0];
34  width_scale_ = scales_data[1];
35  }
36 
37  const int batch_size = X.dim32(0);
38  const int num_channels = X.dim32(1);
39  const int input_height = X.dim32(2);
40  const int input_width = X.dim32(3);
41  int output_width = input_width * width_scale_;
42  int output_height = input_height * height_scale_;
43  auto* Y = Output(
44  0,
45  {batch_size, num_channels, output_height, output_width},
46  at::dtype<float>());
47 
48  const float* input = X.data<float>();
49  float* output = Y->mutable_data<float>();
50  int channels = num_channels * batch_size;
51 
52  const float rheight = (output_height > 1)
53  ? (float)(input_height - 1) / (output_height - 1)
54  : 0.f;
55  const float rwidth =
56  (output_width > 1) ? (float)(input_width - 1) / (output_width - 1) : 0.f;
57  for (int h2 = 0; h2 < output_height; ++h2) {
58  const float h1r = rheight * h2;
59  const int h1 = h1r;
60  const int h1p = (h1 < input_height - 1) ? 1 : 0;
61  const float h1lambda = h1r - h1;
62  const float h0lambda = (float)1. - h1lambda;
63  for (int w2 = 0; w2 < output_width; ++w2) {
64  const float w1r = rwidth * w2;
65  const int w1 = w1r;
66  const int w1p = (w1 < input_width - 1) ? 1 : 0;
67  const float w1lambda = w1r - w1;
68  const float w0lambda = (float)1. - w1lambda;
69  const float* Xdata = &input[h1 * input_width + w1];
70  float* Ydata = &output[h2 * output_width + w2];
71  for (int c = 0; c < channels; ++c) {
72  Ydata[0] = h0lambda * (w0lambda * Xdata[0] + w1lambda * Xdata[w1p]) +
73  h1lambda *
74  (w0lambda * Xdata[h1p * input_width] +
75  w1lambda * Xdata[h1p * input_width + w1p]);
76  Xdata += input_width * input_height;
77  Ydata += output_width * output_height;
78  }
79  }
80  }
81 
82  return true;
83 }
84 
85 template <>
86 bool UpsampleBilinearGradientOp<float, CPUContext>::RunOnDevice() {
87  const auto& dY = Input(0);
88  const auto& X = Input(1);
89 
90  if (InputSize() == 3) {
91  const auto& scales = Input(2);
92  CAFFE_ENFORCE_EQ(scales.dim(), 1);
93  CAFFE_ENFORCE_EQ(scales.numel(), 2);
94  const float* scales_data = scales.data<float>();
95  height_scale_ = scales_data[0];
96  width_scale_ = scales_data[1];
97  }
98 
99  const auto inputDims = dY.sizes();
100  CAFFE_ENFORCE_EQ(4, inputDims.size());
101  const int batch_size = dY.dim32(0);
102  const int num_channels = dY.dim32(1);
103  const int input_height = dY.dim32(2);
104  const int input_width = dY.dim32(3);
105  const int output_height = X.dim32(2);
106  const int output_width = X.dim32(3);
107  auto* dX = Output(
108  0,
109  {batch_size, num_channels, output_height, output_width},
110  at::dtype<float>());
111  math::Set<float, CPUContext>(
112  dX->numel(), 0.0f, dX->mutable_data<float>(), &context_);
113 
114  const float* dYdata = dY.data<float>();
115  float* dXdata = dX->mutable_data<float>();
116  int channels = num_channels * batch_size;
117 
118  const float rheight = (input_height > 1)
119  ? (float)(output_height - 1) / (input_height - 1)
120  : 0.f;
121  const float rwidth =
122  (input_width > 1) ? (float)(output_width - 1) / (input_width - 1) : 0.f;
123 
124  for (int h2 = 0; h2 < input_height; ++h2) {
125  const float h1r = rheight * h2;
126  const int h1 = h1r;
127  const int h1p = (h1 < output_height - 1) ? 1 : 0;
128  const float h1lambda = h1r - h1;
129  const float h0lambda = (float)1. - h1lambda;
130  for (int w2 = 0; w2 < input_width; ++w2) {
131  const float w1r = rwidth * w2;
132  const int w1 = w1r;
133  const int w1p = (w1 < output_width - 1) ? 1 : 0;
134  const float w1lambda = w1r - w1;
135  const float w0lambda = (float)1. - w1lambda;
136  float* pos1 = &dXdata[h1 * output_width + w1];
137  const float* pos2 = &dYdata[h2 * input_width + w2];
138  for (int c = 0; c < channels; ++c) {
139  pos1[0] += h0lambda * w0lambda * pos2[0];
140  pos1[w1p] += h0lambda * w1lambda * pos2[0];
141  pos1[h1p * output_width] += h1lambda * w0lambda * pos2[0];
142  pos1[h1p * output_width + w1p] += h1lambda * w1lambda * pos2[0];
143  pos1 += output_width * output_height;
144  pos2 += input_width * input_height;
145  }
146  }
147  }
148 
149  return true;
150 }
151 
152 REGISTER_CPU_OPERATOR(UpsampleBilinear, UpsampleBilinearOp<float, CPUContext>);
153 REGISTER_CPU_OPERATOR(
154  UpsampleBilinearGradient,
155  UpsampleBilinearGradientOp<float, CPUContext>);
156 
157 // Input: X, output: Y
158 OPERATOR_SCHEMA(UpsampleBilinear)
159  .NumInputs(1, 2)
160  .NumOutputs(1)
161  .Arg("width_scale", "Scale along width dimension")
162  .Arg("height_scale", "Scale along height dimension")
163  .SetDoc(R"DOC(
164 Resizes the spatial dimensions of the input using bilinear
165 interpolation. The `width_scale` and `height_scale` arguments
166 control the size of the output, which is given by:
167 output_width = floor(input_width * width_scale)
168 output_height = floor(output_height * height_scale)
169 )DOC")
170  .Input(0, "X", "Input tensor")
171  .Input(
172  1,
173  "scales",
174  "1D, 2-element, Scales tensor, [height_scale, width_scale]")
175  .Output(0, "Y", "Output tensor");
176 
177 // Input: dY, output: dX
178 OPERATOR_SCHEMA(UpsampleBilinearGradient)
179  .NumInputs(2, 3)
180  .NumOutputs(1)
181  .Arg("width_scale", "Scale along width dimension")
182  .Arg("height_scale", "Scale along height dimension");
183 
185  using GradientMakerBase::GradientMakerBase;
186  vector<OperatorDef> GetGradientDefs() override {
187  if (def_.input().size() == 2) {
188  // this is a hack to support the second input as dynamic
189  // width_scale and height_scale to align with onnx change
190  return SingleGradientDef(
191  "UpsampleBilinearGradient",
192  "",
193  vector<string>{GO(0), I(0), I(1)},
194  vector<string>{GI(0)});
195  }
196  return SingleGradientDef(
197  "UpsampleBilinearGradient",
198  "",
199  vector<string>{GO(0), I(0)},
200  vector<string>{GI(0)});
201  }
202 };
203 REGISTER_GRADIENT(UpsampleBilinear, GetUpsampleBilinearGradient);
204 
205 } // namespace caffe2
A global dictionary that holds information about what Caffe2 modules have been loaded in the current ...
Definition: blob.h:13
static vector< OperatorDef > SingleGradientDef(const Args &...args)
a helper function to allow one to create one single operator def, which is usually the case for many ...