Caffe2 - C++ API
A deep learning, cross platform ML framework
deform_conv_op_impl.h
1 
17 // conv_op_impl.h is the templated implementation of the conv_op.h file.
18 #ifndef CAFFE2_OPERATORS_DEFORM_CONV_OP_IMPL_H_
19 #define CAFFE2_OPERATORS_DEFORM_CONV_OP_IMPL_H_
20 
21 #include "caffe2/core/context.h"
22 #include "caffe2/core/flags.h"
23 #include "caffe2/core/logging.h"
24 #include "caffe2/core/operator.h"
25 #include "caffe2/operators/conv_pool_op_base.h"
26 #include "caffe2/operators/deform_conv_op.h"
27 #include "caffe2/utils/math.h"
28 
29 namespace caffe2 {
30 
31 template <typename T, class Context>
32 bool DeformConvOp<T, Context>::RunOnDeviceWithOrderNCHW() {
33  const Tensor<Context>& X = Input(INPUT);
34  const Tensor<Context>& offset = Input(OFFSET);
35  auto& filter = Input(FILTER);
36  Tensor<Context>* Y = Output(0);
37  const int N = X.dim32(0), C = X.dim32(1);
38  CAFFE_ENFORCE_EQ(X.ndim(), filter.ndim());
39  const int M = filter.dim32(0);
40  CAFFE_ENFORCE(
41  C == filter.dim32(1) * group_,
42  "Convolution op: input channels does not match: # of input channels ",
43  C,
44  " is not equal to kernel channels * group:",
45  filter.dim32(1),
46  "*",
47  group_);
48  CAFFE_ENFORCE(
49  M % group_ == 0,
50  "The number of output channels is not divisible by group.");
51  CAFFE_ENFORCE(
52  kernel_.size() == 2,
53  "Deformable convolution only supports 2d kernel, has ",
54  kernel_.size(),
55  "d kernel.");
56  CAFFE_ENFORCE(
57  offset.ndim() == 4,
58  "Deformable convolution only supports 4d offset, has ",
59  offset.ndim(),
60  "d offset.");
61  CAFFE_ENFORCE_EQ(offset.dim32(0), N);
62  CAFFE_ENFORCE(
63  C % deformable_group_ == 0,
64  "The number of input channels ",
65  C,
66  " is not divisible by deformable group ",
67  deformable_group_);
68  CAFFE_ENFORCE(
69  M % deformable_group_ == 0,
70  "The number of output channels ",
71  M,
72  " is not divisible by deformable group ",
73  deformable_group_);
74  CAFFE_ENFORCE(
75  offset.dim32(1) == 2 * kernel_h() * kernel_w() * deformable_group_,
76  "Deformable convolution: offset 1st dimension must equal "
77  "2 * kernel_h * kernel_w * deformable_group: 2 * ",
78  kernel_h(),
79  " * ",
80  kernel_w(),
81  " * ",
82  deformable_group_);
83 
84  CAFFE_ENFORCE_EQ(
85  offset.dim32(2),
86  (X.dim32(2) + pad_t() + pad_b() - (dilation_h() * (kernel_h() - 1) + 1)) /
87  stride_h() +
88  1);
89  CAFFE_ENFORCE_EQ(
90  offset.dim32(3),
91  (X.dim32(3) + pad_l() + pad_r() - (dilation_w() * (kernel_w() - 1) + 1)) /
92  stride_w() +
93  1);
94 
95  int kernel_dims_size = 1;
96  for (int i = 0; i < kernel_.size(); ++i) {
97  CAFFE_ENFORCE(filter.dim32(i + 2) == kernel_[i]);
98  kernel_dims_size *= kernel_[i];
99  }
100 
101  ConvPoolOpBase<Context>::SetOutputSize(X, Y, filter.dim32(0));
102 
103  const vector<int> input_dims = GetDims(X);
104  const vector<int> output_dims = GetDims(*Y);
105  const int input_image_size = this->GetDimsSize(X);
106  const int output_image_size = this->GetDimsSize(*Y);
107 
108  vector<int> img_shape;
109  img_shape.assign(X.dims().begin() + 1, X.dims().end());
110 
111  vector<int> buffer_shape;
112  buffer_shape.push_back(C / group_ * kernel_dims_size);
113  buffer_shape.insert(
114  buffer_shape.end(), output_dims.begin(), output_dims.end());
115 
116  // The dimension of each kernel
117  const int kernel_dim = C / group_ * kernel_dims_size;
118  // The offset corresponding to a single input image, and a single output
119  // image.
120  const int input_offset = C / group_ * input_image_size;
121  const int output_offset = M / group_ * output_image_size;
122  const int offset_offset = offset.size() / offset.dim32(0);
123  const int filter_offset = filter.size() / group_;
124 
125  // The col buffer is stored in CHW order as well - kernel_dim, and the height
126  // and width.
127  const T* Xdata = X.template data<T>();
128  const T* offset_data = offset.template data<T>();
129 
130  if (InputSize() == 4) {
131  auto& bias = Input(BIAS);
132  CAFFE_ENFORCE(bias.ndim() == 1);
133  CAFFE_ENFORCE(bias.dim32(0) == M);
134  if (bias_multiplier_.size() != output_image_size) {
135  // If the helper bias multiplier is not image size, reshape and fill it
136  // with
137  // one.
138  bias_multiplier_.Resize(vector<TIndex>(1, output_image_size));
139  math::Set<T, Context>(
140  output_image_size,
141  static_cast<T>(1),
142  bias_multiplier_.template mutable_data<T>(),
143  &context_);
144  }
145  }
146  T* Ydata = Y->template mutable_data<T>();
147  const T* bias_data = nullptr;
148  if (InputSize() == 4) {
149  bias_data = Input(BIAS).template data<T>();
150  }
151 
152  auto f = [&](Tensor<Context>* col_buffer) {
153  col_buffer->Resize(buffer_shape);
154  T* col_buffer_data = col_buffer->template mutable_data<T>();
155  // Im2col, followed by gemm.
156  for (int image_id = 0; image_id < N; ++image_id) {
157  for (int group_id = 0; group_id < group_; ++group_id) {
158  DeformableIm2col(
159  Xdata + group_id * input_offset,
160  offset_data,
161  X.dims(),
162  col_buffer->dims(),
163  col_buffer_data);
164  // Weight term
165  math::Gemm<T, Context>(
166  CblasNoTrans,
167  CblasNoTrans,
168  M / group_,
169  output_image_size,
170  kernel_dim,
171  1,
172  filter.template data<T>() + group_id * filter_offset,
173  col_buffer_data,
174  0,
175  Ydata + group_id * output_offset,
176  &context_);
177  }
178  if (bias_data) {
179  math::Gemm<T, Context>(
180  CblasNoTrans,
181  CblasNoTrans,
182  M,
183  output_image_size,
184  1,
185  1,
186  bias_data,
187  bias_multiplier_.template data<T>(),
188  1,
189  Ydata,
190  &context_);
191  }
192  Xdata += input_offset * group_;
193  Ydata += output_offset * group_;
194  offset_data += offset_offset;
195  }
196  };
197 
198  if (FLAGS_caffe2_force_shared_col_buffer || shared_buffer_) {
199  runWithSharedBuffer<Context>(ws_, f);
200  } else {
201  f(&col_buffer_);
202  }
203  return true;
204 }
205 
206 template <typename T, class Context>
207 bool DeformConvGradientOp<T, Context>::RunOnDeviceWithOrderNCHW() {
208  auto& X = Input(INPUT);
209  auto& offset = Input(OFFSET);
210  auto& filter = Input(FILTER);
211  auto& dY = Input(OUTPUT_GRAD);
212  auto* dfilter = Output(FILTER_GRAD);
213  auto* doffset = Output(OFFSET_GRAD);
214  const int N = X.dim32(0), C = X.dim32(1);
215 
216  const vector<int> input_dims = this->GetDims(X);
217  const int input_image_size = this->GetDimsSize(X);
218 
219  const vector<int> output_dims = this->GetDims(dY);
220  // The output image size is the spatial size of the output.
221  const int output_image_size = this->GetDimsSize(dY);
222 
223  ConvPoolOpBase<Context>::ComputePads(input_dims);
224  CAFFE_ENFORCE_EQ(X.ndim(), filter.ndim());
225  const int M = filter.dim32(0);
226  CAFFE_ENFORCE(filter.dim32(1) * group_ == C);
227 
228  CAFFE_ENFORCE(
229  kernel_.size() == 2,
230  "Deformable convolution only supports 2d kernel, has ",
231  kernel_.size(),
232  "d kernel.");
233  CAFFE_ENFORCE(
234  offset.ndim() == 4,
235  "Deformable convolution only supports 4d offset, has ",
236  offset.ndim(),
237  "d offset.");
238  CAFFE_ENFORCE_EQ(offset.dim32(0), N);
239  CAFFE_ENFORCE(
240  C % deformable_group_ == 0,
241  "The number of input channels ",
242  C,
243  " is not divisible by deformable group ",
244  deformable_group_);
245  CAFFE_ENFORCE(
246  M % deformable_group_ == 0,
247  "The number of output channels ",
248  M,
249  " is not divisible by deformable group ",
250  deformable_group_);
251  CAFFE_ENFORCE(
252  offset.dim32(1) == 2 * kernel_h() * kernel_w() * deformable_group_,
253  "Deformable convolution: offset 1st dimension must equal "
254  "2 * kernel_h * kernel_w * deformable_group: 2 * ",
255  kernel_h(),
256  " * ",
257  kernel_w(),
258  " * ",
259  deformable_group_);
260 
261  CAFFE_ENFORCE_EQ(
262  offset.dim32(2),
263  (X.dim32(2) + pad_t() + pad_b() - (dilation_h() * (kernel_h() - 1) + 1)) /
264  stride_h() +
265  1);
266  CAFFE_ENFORCE_EQ(
267  offset.dim32(3),
268  (X.dim32(3) + pad_l() + pad_r() - (dilation_w() * (kernel_w() - 1) + 1)) /
269  stride_w() +
270  1);
271 
272  int kernel_dims_size = 1;
273  for (int i = 0; i < kernel_.size(); ++i) {
274  CAFFE_ENFORCE(filter.dim32(i + 2) == kernel_[i]);
275  kernel_dims_size *= kernel_[i];
276  }
277 
278  CAFFE_ENFORCE(M % group_ == 0);
279  dfilter->ResizeLike(filter);
280  doffset->ResizeLike(offset);
281 
282  // The dimension of each kernel
283  const int kernel_dim = C / group_ * kernel_dims_size;
284  // The offset corresponding to a single input image, and a single output
285  // image.
286  const int input_offset = C / group_ * input_image_size;
287  const int output_offset = M / group_ * output_image_size;
288  const int offset_offset = offset.size() / offset.dim32(0);
289  const int filter_offset = filter.size() / group_;
290 
291  // The col buffer is stored in CHW order as well - kernel_dim, and the
292  // height and width.
293  vector<TIndex> img_shape;
294  img_shape.assign(X.dims().begin() + 1, X.dims().end());
295  vector<TIndex> col_buffer_shape;
296  col_buffer_shape.push_back(C * kernel_dims_size);
297  col_buffer_shape.insert(
298  col_buffer_shape.end(), output_dims.begin(), output_dims.end());
299  col_buffer_.Resize(col_buffer_shape);
300 
301  const int col_buffer_offset = col_buffer_.size() / group_;
302 
303  const T* Xdata = X.template data<T>();
304  const T* filter_data = filter.template data<T>();
305  const T* offset_data = offset.template data<T>();
306  const T* dYdata = dY.template data<T>();
307  T* col_buffer_data = col_buffer_.template mutable_data<T>();
308  T* dfilter_data = dfilter->template mutable_data<T>();
309  T* doffset_data = doffset->template mutable_data<T>();
310 
311  // Pre-setting the gradients to zero.
312  math::Set<T, Context>(dfilter->size(), 0, dfilter_data, &context_);
313 
314  T* dbias_data = nullptr;
315  if (!no_bias_) {
316  auto* dbias = Output(BIAS_OR_INPUT_GRAD);
317  dbias->Resize(M);
318  if (bias_multiplier_.size() != output_image_size) {
319  // If the helper bias multiplier is not M, reshape and fill it with one.
320  bias_multiplier_.Resize(vector<TIndex>(1, output_image_size));
321  math::Set<T, Context>(
322  output_image_size,
323  static_cast<T>(1),
324  bias_multiplier_.template mutable_data<T>(),
325  &context_);
326  }
327  dbias_data = dbias->template mutable_data<T>();
328  math::Set<T, Context>(dbias->size(), 0, dbias_data, &context_);
329  }
330 
331  T* dXdata = nullptr;
332  if (OutputSize() == 4 || (no_bias_ && (OutputSize() == 3))) {
333  auto* dX = Output(no_bias_ ? BIAS_OR_INPUT_GRAD : INPUT_GRAD);
334  dX->ResizeLike(X);
335  dXdata = dX->template mutable_data<T>();
336  math::Set<T, Context>(dX->size(), 0, dXdata, &context_);
337  }
338 
339  for (int image_id = 0; image_id < N; ++image_id) {
340  for (int group_id = 0; group_id < group_; ++group_id) {
341  math::Gemm<T, Context>(
342  CblasTrans,
343  CblasNoTrans,
344  kernel_dim,
345  output_image_size,
346  M / group_,
347  1,
348  filter_data + group_id * filter_offset,
349  dYdata + group_id * output_offset,
350  0,
351  col_buffer_data + group_id * col_buffer_offset,
352  &context_);
353  }
354 
355  // Gradient with respect to offsets
356  DeformableCol2imCoord(
357  col_buffer_data,
358  Xdata,
359  offset_data,
360  X.dims(),
361  col_buffer_shape,
362  doffset_data);
363 
364  // Gradient with respect to input data
365  if (dXdata) {
366  DeformableCol2im(
367  col_buffer_data, offset_data, X.dims(), col_buffer_shape, dXdata);
368  dXdata += input_offset * group_;
369  }
370 
371  // Gradient with respect to filter
372  DeformableIm2col(
373  Xdata, offset_data, X.dims(), col_buffer_shape, col_buffer_data);
374 
375  for (int group_id = 0; group_id < group_; ++group_id) {
376  math::Gemm<T, Context>(
377  CblasNoTrans,
378  CblasTrans,
379  M / group_,
380  kernel_dim,
381  output_image_size,
382  1,
383  dYdata + group_id * output_offset,
384  col_buffer_data + group_id * col_buffer_offset,
385  1,
386  dfilter_data + group_id * filter_offset,
387  &context_);
388  }
389 
390  // Gradient with respect to bias
391  if (dbias_data) {
392  math::Gemv<T, Context>(
393  CblasNoTrans,
394  M,
395  output_image_size,
396  1,
397  dYdata,
398  bias_multiplier_.template data<T>(),
399  1,
400  dbias_data,
401  &context_);
402  }
403 
404  Xdata += input_offset * group_;
405  dYdata += output_offset * group_;
406  offset_data += offset_offset;
407  doffset_data += offset_offset;
408  }
409 
410  return true;
411 }
412 } // namespace caffe2
413 
414 #endif // CAFFE2_OPERATORS_DEFORM_CONV_OP_IMPL_H_
Copyright (c) 2016-present, Facebook, Inc.
Copyright (c) 2016-present, Facebook, Inc.
Copyright (c) 2016-present, Facebook, Inc.