Caffe2 - C++ API
A deep learning, cross platform ML framework
deform_conv_op_impl.h
1 // conv_op_impl.h is the templated implementation of the conv_op.h file.
2 #ifndef CAFFE2_OPERATORS_DEFORM_CONV_OP_IMPL_H_
3 #define CAFFE2_OPERATORS_DEFORM_CONV_OP_IMPL_H_
4 
5 #include "caffe2/core/context.h"
6 #include "caffe2/core/flags.h"
7 #include "caffe2/core/logging.h"
8 #include "caffe2/core/operator.h"
9 #include "caffe2/operators/conv_pool_op_base.h"
10 #include "caffe2/operators/deform_conv_op.h"
11 #include "caffe2/utils/math.h"
12 
13 namespace caffe2 {
14 
15 template <typename T, class Context>
16 bool DeformConvOp<T, Context>::RunOnDeviceWithOrderNCHW() {
17  const Tensor& X = Input(INPUT);
18  const Tensor& offset = Input(OFFSET);
19  auto& filter = Input(FILTER);
20  Tensor* Y = Output(0);
21  const int N = X.dim32(0), C = X.dim32(1);
22  CAFFE_ENFORCE_EQ(X.dim(), filter.dim());
23  const int M = filter.dim32(0);
24  CAFFE_ENFORCE(
25  C == filter.dim32(1) * group_,
26  "Convolution op: input channels does not match: # of input channels ",
27  C,
28  " is not equal to kernel channels * group:",
29  filter.dim32(1),
30  "*",
31  group_);
32  CAFFE_ENFORCE(
33  M % group_ == 0,
34  "The number of output channels is not divisible by group.");
35  CAFFE_ENFORCE(
36  kernel_.size() == 2,
37  "Deformable convolution only supports 2d kernel, has ",
38  kernel_.size(),
39  "d kernel.");
40  CAFFE_ENFORCE(
41  offset.dim() == 4,
42  "Deformable convolution only supports 4d offset, has ",
43  offset.dim(),
44  "d offset.");
45  CAFFE_ENFORCE_EQ(offset.dim32(0), N);
46  CAFFE_ENFORCE(
47  C % deformable_group_ == 0,
48  "The number of input channels ",
49  C,
50  " is not divisible by deformable group ",
51  deformable_group_);
52  CAFFE_ENFORCE(
53  M % deformable_group_ == 0,
54  "The number of output channels ",
55  M,
56  " is not divisible by deformable group ",
57  deformable_group_);
58  CAFFE_ENFORCE(
59  offset.dim32(1) == 2 * kernel_h() * kernel_w() * deformable_group_,
60  "Deformable convolution: offset 1st dimension must equal "
61  "2 * kernel_h * kernel_w * deformable_group: 2 * ",
62  kernel_h(),
63  " * ",
64  kernel_w(),
65  " * ",
66  deformable_group_);
67 
68  CAFFE_ENFORCE_EQ(
69  offset.dim32(2),
70  (X.dim32(2) + pad_t() + pad_b() - (dilation_h() * (kernel_h() - 1) + 1)) /
71  stride_h() +
72  1);
73  CAFFE_ENFORCE_EQ(
74  offset.dim32(3),
75  (X.dim32(3) + pad_l() + pad_r() - (dilation_w() * (kernel_w() - 1) + 1)) /
76  stride_w() +
77  1);
78 
79  int kernel_dims_size = 1;
80  for (int i = 0; i < kernel_.size(); ++i) {
81  CAFFE_ENFORCE(filter.dim32(i + 2) == kernel_[i]);
82  kernel_dims_size *= kernel_[i];
83  }
84 
85  ConvPoolOpBase<Context>::SetOutputSize(X, Y, filter.dim32(0));
86 
87  const vector<int> input_dims = GetDims(X);
88  const vector<int> output_dims = GetDims(*Y);
89  const int input_image_size = this->GetDimsSize(X);
90  const int output_image_size = this->GetDimsSize(*Y);
91 
92  vector<int> img_shape;
93  img_shape.assign(X.sizes().begin() + 1, X.sizes().end());
94 
95  vector<int> buffer_shape;
96  buffer_shape.push_back(C / group_ * kernel_dims_size);
97  buffer_shape.insert(
98  buffer_shape.end(), output_dims.begin(), output_dims.end());
99 
100  // The dimension of each kernel
101  const int kernel_dim = C / group_ * kernel_dims_size;
102  // The offset corresponding to a single input image, and a single output
103  // image.
104  const int input_offset = C / group_ * input_image_size;
105  const int output_offset = M / group_ * output_image_size;
106  const int offset_offset = offset.numel() / offset.dim32(0);
107  const int filter_offset = filter.numel() / group_;
108 
109  // The col buffer is stored in CHW order as well - kernel_dim, and the height
110  // and width.
111  const T* Xdata = X.template data<T>();
112  const T* offset_data = offset.template data<T>();
113 
114  if (InputSize() == 4) {
115  auto& bias = Input(BIAS);
116  CAFFE_ENFORCE(bias.dim() == 1);
117  CAFFE_ENFORCE(bias.dim32(0) == M);
118  if (bias_multiplier_.numel() != output_image_size) {
119  // If the helper bias multiplier is not image size, reshape and fill it
120  // with
121  // one.
123  &bias_multiplier_,
124  vector<int64_t>(1, output_image_size),
125  at::dtype<T>().device(Context::GetDeviceType()));
126  math::Set<T, Context>(
127  output_image_size,
128  static_cast<T>(1),
129  bias_multiplier_.template mutable_data<T>(),
130  &context_);
131  }
132  }
133  T* Ydata = Y->template mutable_data<T>();
134  const T* bias_data = nullptr;
135  if (InputSize() == 4) {
136  bias_data = Input(BIAS).template data<T>();
137  }
138 
139  auto f = [&](Tensor* col_buffer) {
140  col_buffer->Resize(buffer_shape);
141  T* col_buffer_data = col_buffer->template mutable_data<T>();
142  // Im2col, followed by gemm.
143  for (int image_id = 0; image_id < N; ++image_id) {
144  for (int group_id = 0; group_id < group_; ++group_id) {
145  DeformableIm2col(
146  Xdata + group_id * input_offset,
147  offset_data,
148  X.sizes(),
149  col_buffer->sizes(),
150  col_buffer_data);
151  // Weight term
152  math::Gemm<T, Context>(
153  CblasNoTrans,
154  CblasNoTrans,
155  M / group_,
156  output_image_size,
157  kernel_dim,
158  1,
159  filter.template data<T>() + group_id * filter_offset,
160  col_buffer_data,
161  0,
162  Ydata + group_id * output_offset,
163  &context_);
164  }
165  if (bias_data) {
166  math::Gemm<T, Context>(
167  CblasNoTrans,
168  CblasNoTrans,
169  M,
170  output_image_size,
171  1,
172  1,
173  bias_data,
174  bias_multiplier_.template data<T>(),
175  1,
176  Ydata,
177  &context_);
178  }
179  Xdata += input_offset * group_;
180  Ydata += output_offset * group_;
181  offset_data += offset_offset;
182  }
183  };
184 
185  if (FLAGS_caffe2_force_shared_col_buffer || shared_buffer_) {
186  runWithSharedBuffer<Context>(ws_, f);
187  } else {
188  f(&col_buffer_);
189  }
190  return true;
191 }
192 
193 template <typename T, class Context>
194 bool DeformConvGradientOp<T, Context>::RunOnDeviceWithOrderNCHW() {
195  auto& X = Input(INPUT);
196  auto& offset = Input(OFFSET);
197  auto& filter = Input(FILTER);
198  auto& dY = Input(OUTPUT_GRAD);
199 
200 
201  const int N = X.dim32(0), C = X.dim32(1);
202 
203  const vector<int> input_dims = this->GetDims(X);
204  const int input_image_size = this->GetDimsSize(X);
205 
206  const vector<int> output_dims = this->GetDims(dY);
207  // The output image size is the spatial size of the output.
208  const int output_image_size = this->GetDimsSize(dY);
209 
210  ConvPoolOpBase<Context>::ComputePads(input_dims);
211  CAFFE_ENFORCE_EQ(X.dim(), filter.dim());
212  const int M = filter.dim32(0);
213  CAFFE_ENFORCE(filter.dim32(1) * group_ == C);
214 
215  CAFFE_ENFORCE(
216  kernel_.size() == 2,
217  "Deformable convolution only supports 2d kernel, has ",
218  kernel_.size(),
219  "d kernel.");
220  CAFFE_ENFORCE(
221  offset.dim() == 4,
222  "Deformable convolution only supports 4d offset, has ",
223  offset.dim(),
224  "d offset.");
225  CAFFE_ENFORCE_EQ(offset.dim32(0), N);
226  CAFFE_ENFORCE(
227  C % deformable_group_ == 0,
228  "The number of input channels ",
229  C,
230  " is not divisible by deformable group ",
231  deformable_group_);
232  CAFFE_ENFORCE(
233  M % deformable_group_ == 0,
234  "The number of output channels ",
235  M,
236  " is not divisible by deformable group ",
237  deformable_group_);
238  CAFFE_ENFORCE(
239  offset.dim32(1) == 2 * kernel_h() * kernel_w() * deformable_group_,
240  "Deformable convolution: offset 1st dimension must equal "
241  "2 * kernel_h * kernel_w * deformable_group: 2 * ",
242  kernel_h(),
243  " * ",
244  kernel_w(),
245  " * ",
246  deformable_group_);
247 
248  CAFFE_ENFORCE_EQ(
249  offset.dim32(2),
250  (X.dim32(2) + pad_t() + pad_b() - (dilation_h() * (kernel_h() - 1) + 1)) /
251  stride_h() +
252  1);
253  CAFFE_ENFORCE_EQ(
254  offset.dim32(3),
255  (X.dim32(3) + pad_l() + pad_r() - (dilation_w() * (kernel_w() - 1) + 1)) /
256  stride_w() +
257  1);
258 
259  int kernel_dims_size = 1;
260  for (int i = 0; i < kernel_.size(); ++i) {
261  CAFFE_ENFORCE(filter.dim32(i + 2) == kernel_[i]);
262  kernel_dims_size *= kernel_[i];
263  }
264 
265  CAFFE_ENFORCE(M % group_ == 0);
266  auto* dfilter = Output(FILTER_GRAD, filter.sizes(), at::dtype<T>());
267  auto* doffset = Output(OFFSET_GRAD, offset.sizes(), at::dtype<T>());
268 
269  // The dimension of each kernel
270  const int kernel_dim = C / group_ * kernel_dims_size;
271  // The offset corresponding to a single input image, and a single output
272  // image.
273  const int input_offset = C / group_ * input_image_size;
274  const int output_offset = M / group_ * output_image_size;
275  const int offset_offset = offset.numel() / offset.dim32(0);
276  const int filter_offset = filter.numel() / group_;
277 
278  // The col buffer is stored in CHW order as well - kernel_dim, and the
279  // height and width.
280  vector<int64_t> img_shape;
281  img_shape.assign(X.sizes().begin() + 1, X.sizes().end());
282  vector<int64_t> col_buffer_shape;
283  col_buffer_shape.push_back(C * kernel_dims_size);
284  col_buffer_shape.insert(
285  col_buffer_shape.end(), output_dims.begin(), output_dims.end());
287  &col_buffer_,
288  col_buffer_shape,
289  at::dtype<T>().device(Context::GetDeviceType()));
290 
291  const int col_buffer_offset = col_buffer_.numel() / group_;
292 
293  const T* Xdata = X.template data<T>();
294  const T* filter_data = filter.template data<T>();
295  const T* offset_data = offset.template data<T>();
296  const T* dYdata = dY.template data<T>();
297  T* col_buffer_data = col_buffer_.template mutable_data<T>();
298  T* dfilter_data = dfilter->template mutable_data<T>();
299  T* doffset_data = doffset->template mutable_data<T>();
300 
301  // Pre-setting the gradients to zero.
302  math::Set<T, Context>(dfilter->numel(), 0, dfilter_data, &context_);
303 
304  T* dbias_data = nullptr;
305  if (!no_bias_) {
306 
307  auto* dbias = Output(BIAS_OR_INPUT_GRAD, {M}, at::dtype<T>());
308  if (bias_multiplier_.numel() != output_image_size) {
309  // If the helper bias multiplier is not M, reshape and fill it with one.
311  &bias_multiplier_,
312  vector<int64_t>(1, output_image_size),
313  at::dtype<T>().device(Context::GetDeviceType()));
314  math::Set<T, Context>(
315  output_image_size,
316  static_cast<T>(1),
317  bias_multiplier_.template mutable_data<T>(),
318  &context_);
319  }
320  dbias_data = dbias->template mutable_data<T>();
321  math::Set<T, Context>(dbias->numel(), 0, dbias_data, &context_);
322  }
323 
324  T* dXdata = nullptr;
325  if (OutputSize() == 4 || (no_bias_ && (OutputSize() == 3))) {
326 
327  auto* dX = Output(no_bias_ ? BIAS_OR_INPUT_GRAD : INPUT_GRAD, X.sizes(), at::dtype<T>());
328  dXdata = dX->template mutable_data<T>();
329  math::Set<T, Context>(dX->numel(), 0, dXdata, &context_);
330  }
331 
332  for (int image_id = 0; image_id < N; ++image_id) {
333  for (int group_id = 0; group_id < group_; ++group_id) {
334  math::Gemm<T, Context>(
335  CblasTrans,
336  CblasNoTrans,
337  kernel_dim,
338  output_image_size,
339  M / group_,
340  1,
341  filter_data + group_id * filter_offset,
342  dYdata + group_id * output_offset,
343  0,
344  col_buffer_data + group_id * col_buffer_offset,
345  &context_);
346  }
347 
348  // Gradient with respect to offsets
349  DeformableCol2imCoord(
350  col_buffer_data,
351  Xdata,
352  offset_data,
353  X.sizes(),
354  col_buffer_shape,
355  doffset_data);
356 
357  // Gradient with respect to input data
358  if (dXdata) {
359  DeformableCol2im(
360  col_buffer_data, offset_data, X.sizes(), col_buffer_shape, dXdata);
361  dXdata += input_offset * group_;
362  }
363 
364  // Gradient with respect to filter
365  DeformableIm2col(
366  Xdata, offset_data, X.sizes(), col_buffer_shape, col_buffer_data);
367 
368  for (int group_id = 0; group_id < group_; ++group_id) {
369  math::Gemm<T, Context>(
370  CblasNoTrans,
371  CblasTrans,
372  M / group_,
373  kernel_dim,
374  output_image_size,
375  1,
376  dYdata + group_id * output_offset,
377  col_buffer_data + group_id * col_buffer_offset,
378  1,
379  dfilter_data + group_id * filter_offset,
380  &context_);
381  }
382 
383  // Gradient with respect to bias
384  if (dbias_data) {
385  math::Gemv<T, Context>(
386  CblasNoTrans,
387  M,
388  output_image_size,
389  1,
390  dYdata,
391  bias_multiplier_.template data<T>(),
392  1,
393  dbias_data,
394  &context_);
395  }
396 
397  Xdata += input_offset * group_;
398  dYdata += output_offset * group_;
399  offset_data += offset_offset;
400  doffset_data += offset_offset;
401  }
402 
403  return true;
404 }
405 } // namespace caffe2
406 
407 #endif // CAFFE2_OPERATORS_DEFORM_CONV_OP_IMPL_H_
Definition: any.cpp:108
void ReinitializeTensor(Tensor *tensor, at::IntArrayRef dims, at::TensorOptions options)
Reinitialize a Tensor to given dims and options if necessary, note that this will not do anything if ...
Definition: tensor.cc:127
A global dictionary that holds information about what Caffe2 modules have been loaded in the current ...
Definition: blob.h:13
Definition: static.cpp:64