2 #ifndef CAFFE2_OPERATORS_DEFORM_CONV_OP_IMPL_H_ 3 #define CAFFE2_OPERATORS_DEFORM_CONV_OP_IMPL_H_ 5 #include "caffe2/core/context.h" 6 #include "caffe2/core/flags.h" 7 #include "caffe2/core/logging.h" 8 #include "caffe2/core/operator.h" 9 #include "caffe2/operators/conv_pool_op_base.h" 10 #include "caffe2/operators/deform_conv_op.h" 11 #include "caffe2/utils/math.h" 15 template <
typename T,
class Context>
16 bool DeformConvOp<T, Context>::RunOnDeviceWithOrderNCHW() {
17 const Tensor& X = Input(INPUT);
18 const Tensor& offset = Input(OFFSET);
19 auto& filter = Input(FILTER);
21 const int N = X.dim32(0),
C = X.dim32(1);
22 CAFFE_ENFORCE_EQ(X.dim(), filter.dim());
23 const int M = filter.dim32(0);
25 C == filter.dim32(1) * group_,
26 "Convolution op: input channels does not match: # of input channels ",
28 " is not equal to kernel channels * group:",
34 "The number of output channels is not divisible by group.");
37 "Deformable convolution only supports 2d kernel, has ",
42 "Deformable convolution only supports 4d offset, has ",
45 CAFFE_ENFORCE_EQ(offset.dim32(0), N);
47 C % deformable_group_ == 0,
48 "The number of input channels ",
50 " is not divisible by deformable group ",
53 M % deformable_group_ == 0,
54 "The number of output channels ",
56 " is not divisible by deformable group ",
59 offset.dim32(1) == 2 * kernel_h() * kernel_w() * deformable_group_,
60 "Deformable convolution: offset 1st dimension must equal " 61 "2 * kernel_h * kernel_w * deformable_group: 2 * ",
70 (X.dim32(2) + pad_t() + pad_b() - (dilation_h() * (kernel_h() - 1) + 1)) /
75 (X.dim32(3) + pad_l() + pad_r() - (dilation_w() * (kernel_w() - 1) + 1)) /
79 int kernel_dims_size = 1;
80 for (
int i = 0; i < kernel_.size(); ++i) {
81 CAFFE_ENFORCE(filter.dim32(i + 2) == kernel_[i]);
82 kernel_dims_size *= kernel_[i];
85 ConvPoolOpBase<Context>::SetOutputSize(X, Y, filter.dim32(0));
87 const vector<int> input_dims = GetDims(X);
88 const vector<int> output_dims = GetDims(*Y);
89 const int input_image_size = this->GetDimsSize(X);
90 const int output_image_size = this->GetDimsSize(*Y);
92 vector<int> img_shape;
93 img_shape.assign(X.sizes().begin() + 1, X.sizes().end());
95 vector<int> buffer_shape;
96 buffer_shape.push_back(C / group_ * kernel_dims_size);
98 buffer_shape.end(), output_dims.begin(), output_dims.end());
101 const int kernel_dim = C / group_ * kernel_dims_size;
104 const int input_offset = C / group_ * input_image_size;
105 const int output_offset = M / group_ * output_image_size;
106 const int offset_offset = offset.numel() / offset.dim32(0);
107 const int filter_offset = filter.numel() / group_;
111 const T* Xdata = X.template data<T>();
112 const T* offset_data = offset.template data<T>();
114 if (InputSize() == 4) {
115 auto& bias = Input(BIAS);
116 CAFFE_ENFORCE(bias.dim() == 1);
117 CAFFE_ENFORCE(bias.dim32(0) == M);
118 if (bias_multiplier_.numel() != output_image_size) {
124 vector<int64_t>(1, output_image_size),
125 at::dtype<T>().device(Context::GetDeviceType()));
126 math::Set<T, Context>(
129 bias_multiplier_.template mutable_data<T>(),
133 T* Ydata = Y->template mutable_data<T>();
134 const T* bias_data =
nullptr;
135 if (InputSize() == 4) {
136 bias_data = Input(BIAS).template data<T>();
139 auto f = [&](
Tensor* col_buffer) {
140 col_buffer->Resize(buffer_shape);
141 T* col_buffer_data = col_buffer->template mutable_data<T>();
143 for (
int image_id = 0; image_id < N; ++image_id) {
144 for (
int group_id = 0; group_id < group_; ++group_id) {
146 Xdata + group_id * input_offset,
152 math::Gemm<T, Context>(
159 filter.template data<T>() + group_id * filter_offset,
162 Ydata + group_id * output_offset,
166 math::Gemm<T, Context>(
174 bias_multiplier_.template data<T>(),
179 Xdata += input_offset * group_;
180 Ydata += output_offset * group_;
181 offset_data += offset_offset;
185 if (FLAGS_caffe2_force_shared_col_buffer || shared_buffer_) {
186 runWithSharedBuffer<Context>(ws_, f);
193 template <
typename T,
class Context>
194 bool DeformConvGradientOp<T, Context>::RunOnDeviceWithOrderNCHW() {
195 auto& X = Input(INPUT);
196 auto& offset = Input(OFFSET);
197 auto& filter = Input(FILTER);
198 auto& dY = Input(OUTPUT_GRAD);
201 const int N = X.dim32(0), C = X.dim32(1);
203 const vector<int> input_dims = this->GetDims(X);
204 const int input_image_size = this->GetDimsSize(X);
206 const vector<int> output_dims = this->GetDims(dY);
208 const int output_image_size = this->GetDimsSize(dY);
210 ConvPoolOpBase<Context>::ComputePads(input_dims);
211 CAFFE_ENFORCE_EQ(X.dim(), filter.dim());
212 const int M = filter.dim32(0);
213 CAFFE_ENFORCE(filter.dim32(1) * group_ == C);
217 "Deformable convolution only supports 2d kernel, has ",
222 "Deformable convolution only supports 4d offset, has ",
225 CAFFE_ENFORCE_EQ(offset.dim32(0), N);
227 C % deformable_group_ == 0,
228 "The number of input channels ",
230 " is not divisible by deformable group ",
233 M % deformable_group_ == 0,
234 "The number of output channels ",
236 " is not divisible by deformable group ",
239 offset.dim32(1) == 2 * kernel_h() * kernel_w() * deformable_group_,
240 "Deformable convolution: offset 1st dimension must equal " 241 "2 * kernel_h * kernel_w * deformable_group: 2 * ",
250 (X.dim32(2) + pad_t() + pad_b() - (dilation_h() * (kernel_h() - 1) + 1)) /
255 (X.dim32(3) + pad_l() + pad_r() - (dilation_w() * (kernel_w() - 1) + 1)) /
259 int kernel_dims_size = 1;
260 for (
int i = 0; i < kernel_.size(); ++i) {
261 CAFFE_ENFORCE(filter.dim32(i + 2) == kernel_[i]);
262 kernel_dims_size *= kernel_[i];
265 CAFFE_ENFORCE(M % group_ == 0);
266 auto* dfilter = Output(FILTER_GRAD, filter.sizes(), at::dtype<T>());
267 auto* doffset = Output(OFFSET_GRAD, offset.sizes(), at::dtype<T>());
270 const int kernel_dim = C / group_ * kernel_dims_size;
273 const int input_offset = C / group_ * input_image_size;
274 const int output_offset = M / group_ * output_image_size;
275 const int offset_offset = offset.numel() / offset.dim32(0);
276 const int filter_offset = filter.numel() / group_;
280 vector<int64_t> img_shape;
281 img_shape.assign(X.sizes().begin() + 1, X.sizes().end());
282 vector<int64_t> col_buffer_shape;
283 col_buffer_shape.push_back(C * kernel_dims_size);
284 col_buffer_shape.insert(
285 col_buffer_shape.end(), output_dims.begin(), output_dims.end());
289 at::dtype<T>().device(Context::GetDeviceType()));
291 const int col_buffer_offset = col_buffer_.numel() / group_;
293 const T* Xdata = X.template data<T>();
294 const T* filter_data = filter.template data<T>();
295 const T* offset_data = offset.template data<T>();
296 const T* dYdata = dY.template data<T>();
297 T* col_buffer_data = col_buffer_.template mutable_data<T>();
298 T* dfilter_data = dfilter->template mutable_data<T>();
299 T* doffset_data = doffset->template mutable_data<T>();
302 math::Set<T, Context>(dfilter->numel(), 0, dfilter_data, &context_);
304 T* dbias_data =
nullptr;
307 auto* dbias = Output(BIAS_OR_INPUT_GRAD, {M}, at::dtype<T>());
308 if (bias_multiplier_.numel() != output_image_size) {
312 vector<int64_t>(1, output_image_size),
313 at::dtype<T>().device(Context::GetDeviceType()));
314 math::Set<T, Context>(
317 bias_multiplier_.template mutable_data<T>(),
320 dbias_data = dbias->template mutable_data<T>();
321 math::Set<T, Context>(dbias->numel(), 0, dbias_data, &context_);
325 if (OutputSize() == 4 || (no_bias_ && (OutputSize() == 3))) {
327 auto* dX = Output(no_bias_ ? BIAS_OR_INPUT_GRAD : INPUT_GRAD, X.sizes(), at::dtype<T>());
328 dXdata = dX->template mutable_data<T>();
329 math::Set<T, Context>(dX->numel(), 0, dXdata, &context_);
332 for (
int image_id = 0; image_id < N; ++image_id) {
333 for (
int group_id = 0; group_id < group_; ++group_id) {
334 math::Gemm<T, Context>(
341 filter_data + group_id * filter_offset,
342 dYdata + group_id * output_offset,
344 col_buffer_data + group_id * col_buffer_offset,
349 DeformableCol2imCoord(
360 col_buffer_data, offset_data, X.sizes(), col_buffer_shape, dXdata);
361 dXdata += input_offset * group_;
366 Xdata, offset_data, X.sizes(), col_buffer_shape, col_buffer_data);
368 for (
int group_id = 0; group_id < group_; ++group_id) {
369 math::Gemm<T, Context>(
376 dYdata + group_id * output_offset,
377 col_buffer_data + group_id * col_buffer_offset,
379 dfilter_data + group_id * filter_offset,
385 math::Gemv<T, Context>(
391 bias_multiplier_.template data<T>(),
397 Xdata += input_offset * group_;
398 dYdata += output_offset * group_;
399 offset_data += offset_offset;
400 doffset_data += offset_offset;
407 #endif // CAFFE2_OPERATORS_DEFORM_CONV_OP_IMPL_H_
void ReinitializeTensor(Tensor *tensor, at::IntArrayRef dims, at::TensorOptions options)
Reinitialize a Tensor to given dims and options if necessary, note that this will not do anything if ...
A global dictionary that holds information about what Caffe2 modules have been loaded in the current ...