doxygen-c/html/deform__conv__op__impl_8h_source.html

 // conv_op_impl.h is the templated implementation of the conv_op.h file.
 #ifndef CAFFE2_OPERATORS_DEFORM_CONV_OP_IMPL_H_
 #define CAFFE2_OPERATORS_DEFORM_CONV_OP_IMPL_H_

 #include "caffe2/core/context.h"
 #include "caffe2/core/flags.h"
 #include "caffe2/core/logging.h"
 #include "caffe2/core/operator.h"
 #include "caffe2/operators/conv_pool_op_base.h"
 #include "caffe2/operators/deform_conv_op.h"
 #include "caffe2/utils/math.h"

 namespace caffe2 {

 template <typename T, class Context>
 bool DeformConvOp<T, Context>::RunOnDeviceWithOrderNCHW() {
   const Tensor& X = Input(INPUT);
   const Tensor& offset = Input(OFFSET);
   auto& filter = Input(FILTER);
   Tensor* Y = Output(0);
   const int N = X.dim32(0), C = X.dim32(1);
   CAFFE_ENFORCE_EQ(X.dim(), filter.dim());
   const int M = filter.dim32(0);
   CAFFE_ENFORCE(
       C == filter.dim32(1) * group_,
       "Convolution op: input channels does not match: # of input channels ",
       C,
       " is not equal to kernel channels * group:",
       filter.dim32(1),
       "*",
       group_);
   CAFFE_ENFORCE(
       M % group_ == 0,
       "The number of output channels is not divisible by group.");
   CAFFE_ENFORCE(
       kernel_.size() == 2,
       "Deformable convolution only supports 2d kernel, has ",
       kernel_.size(),
       "d kernel.");
   CAFFE_ENFORCE(
       offset.dim() == 4,
       "Deformable convolution only supports 4d offset, has ",
       offset.dim(),
       "d offset.");
   CAFFE_ENFORCE_EQ(offset.dim32(0), N);
   CAFFE_ENFORCE(
       C % deformable_group_ == 0,
       "The number of input channels ",
       C,
       " is not divisible by deformable group ",
       deformable_group_);
   CAFFE_ENFORCE(
       M % deformable_group_ == 0,
       "The number of output channels ",
       M,
       " is not divisible by deformable group ",
       deformable_group_);
   CAFFE_ENFORCE(
       offset.dim32(1) == 2 * kernel_h() * kernel_w() * deformable_group_,
       "Deformable convolution: offset 1st dimension must equal "
       "2 * kernel_h * kernel_w * deformable_group: 2 * ",
       kernel_h(),
       " * ",
       kernel_w(),
       " * ",
       deformable_group_);

   CAFFE_ENFORCE_EQ(
       offset.dim32(2),
       (X.dim32(2) + pad_t() + pad_b() - (dilation_h() * (kernel_h() - 1) + 1)) /
               stride_h() +
           1);
   CAFFE_ENFORCE_EQ(
       offset.dim32(3),
       (X.dim32(3) + pad_l() + pad_r() - (dilation_w() * (kernel_w() - 1) + 1)) /
               stride_w() +
           1);

   int kernel_dims_size = 1;
   for (int i = 0; i < kernel_.size(); ++i) {
     CAFFE_ENFORCE(filter.dim32(i + 2) == kernel_[i]);
     kernel_dims_size *= kernel_[i];
   }

   ConvPoolOpBase<Context>::SetOutputSize(X, Y, filter.dim32(0));

   const vector<int> input_dims = GetDims(X);
   const vector<int> output_dims = GetDims(*Y);
   const int input_image_size = this->GetDimsSize(X);
   const int output_image_size = this->GetDimsSize(*Y);

   vector<int> img_shape;
   img_shape.assign(X.sizes().begin() + 1, X.sizes().end());

   vector<int> buffer_shape;
   buffer_shape.push_back(C / group_ * kernel_dims_size);
   buffer_shape.insert(
       buffer_shape.end(), output_dims.begin(), output_dims.end());

   // The dimension of each kernel
   const int kernel_dim = C / group_ * kernel_dims_size;
   // The offset corresponding to a single input image, and a single output
   // image.
   const int input_offset = C / group_ * input_image_size;
   const int output_offset = M / group_ * output_image_size;
   const int offset_offset = offset.numel() / offset.dim32(0);
   const int filter_offset = filter.numel() / group_;

   // The col buffer is stored in CHW order as well - kernel_dim, and the height
   // and width.
   const T* Xdata = X.template data<T>();
   const T* offset_data = offset.template data<T>();

   if (InputSize() == 4) {
     auto& bias = Input(BIAS);
     CAFFE_ENFORCE(bias.dim() == 1);
     CAFFE_ENFORCE(bias.dim32(0) == M);
     if (bias_multiplier_.numel() != output_image_size) {
       // If the helper bias multiplier is not image size, reshape and fill it
       // with
       // one.
       ReinitializeTensor(
           &bias_multiplier_,
           vector<int64_t>(1, output_image_size),
           at::dtype<T>().device(Context::GetDeviceType()));
       math::Set<T, Context>(
           output_image_size,
           static_cast<T>(1),
           bias_multiplier_.template mutable_data<T>(),
           &context_);
     }
   }
   T* Ydata = Y->template mutable_data<T>();
   const T* bias_data = nullptr;
   if (InputSize() == 4) {
     bias_data = Input(BIAS).template data<T>();
   }

   auto f = [&](Tensor* col_buffer) {
     col_buffer->Resize(buffer_shape);
     T* col_buffer_data = col_buffer->template mutable_data<T>();
     // Im2col, followed by gemm.
     for (int image_id = 0; image_id < N; ++image_id) {
       for (int group_id = 0; group_id < group_; ++group_id) {
         DeformableIm2col(
             Xdata + group_id * input_offset,
             offset_data,
             X.sizes(),
             col_buffer->sizes(),
             col_buffer_data);
         // Weight term
         math::Gemm<T, Context>(
             CblasNoTrans,
             CblasNoTrans,
             M / group_,
             output_image_size,
             kernel_dim,
             1,
             filter.template data<T>() + group_id * filter_offset,
             col_buffer_data,
             0,
             Ydata + group_id * output_offset,
             &context_);
       }
       if (bias_data) {
         math::Gemm<T, Context>(
             CblasNoTrans,
             CblasNoTrans,
             M,
             output_image_size,
             1,
             1,
             bias_data,
             bias_multiplier_.template data<T>(),
             1,
             Ydata,
             &context_);
       }
       Xdata += input_offset * group_;
       Ydata += output_offset * group_;
       offset_data += offset_offset;
     }
   };

   if (FLAGS_caffe2_force_shared_col_buffer || shared_buffer_) {
     runWithSharedBuffer<Context>(ws_, f);
   } else {
     f(&col_buffer_);
   }
   return true;
 }

 template <typename T, class Context>
 bool DeformConvGradientOp<T, Context>::RunOnDeviceWithOrderNCHW() {
   auto& X = Input(INPUT);
   auto& offset = Input(OFFSET);
   auto& filter = Input(FILTER);
   auto& dY = Input(OUTPUT_GRAD);


   const int N = X.dim32(0), C = X.dim32(1);

   const vector<int> input_dims = this->GetDims(X);
   const int input_image_size = this->GetDimsSize(X);

   const vector<int> output_dims = this->GetDims(dY);
   // The output image size is the spatial size of the output.
   const int output_image_size = this->GetDimsSize(dY);

   ConvPoolOpBase<Context>::ComputePads(input_dims);
   CAFFE_ENFORCE_EQ(X.dim(), filter.dim());
   const int M = filter.dim32(0);
   CAFFE_ENFORCE(filter.dim32(1) * group_ == C);

   CAFFE_ENFORCE(
       kernel_.size() == 2,
       "Deformable convolution only supports 2d kernel, has ",
       kernel_.size(),
       "d kernel.");
   CAFFE_ENFORCE(
       offset.dim() == 4,
       "Deformable convolution only supports 4d offset, has ",
       offset.dim(),
       "d offset.");
   CAFFE_ENFORCE_EQ(offset.dim32(0), N);
   CAFFE_ENFORCE(
       C % deformable_group_ == 0,
       "The number of input channels ",
       C,
       " is not divisible by deformable group ",
       deformable_group_);
   CAFFE_ENFORCE(
       M % deformable_group_ == 0,
       "The number of output channels ",
       M,
       " is not divisible by deformable group ",
       deformable_group_);
   CAFFE_ENFORCE(
       offset.dim32(1) == 2 * kernel_h() * kernel_w() * deformable_group_,
       "Deformable convolution: offset 1st dimension must equal "
       "2 * kernel_h * kernel_w * deformable_group: 2 * ",
       kernel_h(),
       " * ",
       kernel_w(),
       " * ",
       deformable_group_);

   CAFFE_ENFORCE_EQ(
       offset.dim32(2),
       (X.dim32(2) + pad_t() + pad_b() - (dilation_h() * (kernel_h() - 1) + 1)) /
               stride_h() +
           1);
   CAFFE_ENFORCE_EQ(
       offset.dim32(3),
       (X.dim32(3) + pad_l() + pad_r() - (dilation_w() * (kernel_w() - 1) + 1)) /
               stride_w() +
           1);

   int kernel_dims_size = 1;
   for (int i = 0; i < kernel_.size(); ++i) {
     CAFFE_ENFORCE(filter.dim32(i + 2) == kernel_[i]);
     kernel_dims_size *= kernel_[i];
   }

   CAFFE_ENFORCE(M % group_ == 0);
   auto* dfilter = Output(FILTER_GRAD, filter.sizes(), at::dtype<T>());
   auto* doffset = Output(OFFSET_GRAD, offset.sizes(), at::dtype<T>());

   // The dimension of each kernel
   const int kernel_dim = C / group_ * kernel_dims_size;
   // The offset corresponding to a single input image, and a single output
   // image.
   const int input_offset = C / group_ * input_image_size;
   const int output_offset = M / group_ * output_image_size;
   const int offset_offset = offset.numel() / offset.dim32(0);
   const int filter_offset = filter.numel() / group_;

   // The col buffer is stored in CHW order as well - kernel_dim, and the
   // height and width.
   vector<int64_t> img_shape;
   img_shape.assign(X.sizes().begin() + 1, X.sizes().end());
   vector<int64_t> col_buffer_shape;
   col_buffer_shape.push_back(C * kernel_dims_size);
   col_buffer_shape.insert(
       col_buffer_shape.end(), output_dims.begin(), output_dims.end());
   ReinitializeTensor(
       &col_buffer_,
       col_buffer_shape,
       at::dtype<T>().device(Context::GetDeviceType()));

   const int col_buffer_offset = col_buffer_.numel() / group_;

   const T* Xdata = X.template data<T>();
   const T* filter_data = filter.template data<T>();
   const T* offset_data = offset.template data<T>();
   const T* dYdata = dY.template data<T>();
   T* col_buffer_data = col_buffer_.template mutable_data<T>();
   T* dfilter_data = dfilter->template mutable_data<T>();
   T* doffset_data = doffset->template mutable_data<T>();

   // Pre-setting the gradients to zero.
   math::Set<T, Context>(dfilter->numel(), 0, dfilter_data, &context_);

   T* dbias_data = nullptr;
   if (!no_bias_) {

     auto* dbias = Output(BIAS_OR_INPUT_GRAD, {M}, at::dtype<T>());
     if (bias_multiplier_.numel() != output_image_size) {
       // If the helper bias multiplier is not M, reshape and fill it with one.
       ReinitializeTensor(
           &bias_multiplier_,
           vector<int64_t>(1, output_image_size),
           at::dtype<T>().device(Context::GetDeviceType()));
       math::Set<T, Context>(
           output_image_size,
           static_cast<T>(1),
           bias_multiplier_.template mutable_data<T>(),
           &context_);
     }
     dbias_data = dbias->template mutable_data<T>();
     math::Set<T, Context>(dbias->numel(), 0, dbias_data, &context_);
   }

   T* dXdata = nullptr;
   if (OutputSize() == 4 || (no_bias_ && (OutputSize() == 3))) {

     auto* dX = Output(no_bias_ ? BIAS_OR_INPUT_GRAD : INPUT_GRAD, X.sizes(), at::dtype<T>());
     dXdata = dX->template mutable_data<T>();
     math::Set<T, Context>(dX->numel(), 0, dXdata, &context_);
   }

   for (int image_id = 0; image_id < N; ++image_id) {
     for (int group_id = 0; group_id < group_; ++group_id) {
       math::Gemm<T, Context>(
           CblasTrans,
           CblasNoTrans,
           kernel_dim,
           output_image_size,
           M / group_,
           1,
           filter_data + group_id * filter_offset,
           dYdata + group_id * output_offset,
           0,
           col_buffer_data + group_id * col_buffer_offset,
           &context_);
     }

     // Gradient with respect to offsets
     DeformableCol2imCoord(
         col_buffer_data,
         Xdata,
         offset_data,
         X.sizes(),
         col_buffer_shape,
         doffset_data);

     // Gradient with respect to input data
     if (dXdata) {
       DeformableCol2im(
           col_buffer_data, offset_data, X.sizes(), col_buffer_shape, dXdata);
       dXdata += input_offset * group_;
     }

     // Gradient with respect to filter
     DeformableIm2col(
         Xdata, offset_data, X.sizes(), col_buffer_shape, col_buffer_data);

     for (int group_id = 0; group_id < group_; ++group_id) {
       math::Gemm<T, Context>(
           CblasNoTrans,
           CblasTrans,
           M / group_,
           kernel_dim,
           output_image_size,
           1,
           dYdata + group_id * output_offset,
           col_buffer_data + group_id * col_buffer_offset,
           1,
           dfilter_data + group_id * filter_offset,
           &context_);
     }

     // Gradient with respect to bias
     if (dbias_data) {
       math::Gemv<T, Context>(
           CblasNoTrans,
           M,
           output_image_size,
           1,
           dYdata,
           bias_multiplier_.template data<T>(),
           1,
           dbias_data,
           &context_);
     }

     Xdata += input_offset * group_;
     dYdata += output_offset * group_;
     offset_data += offset_offset;
     doffset_data += offset_offset;
   }

   return true;
 }
 } // namespace caffe2

 #endif // CAFFE2_OPERATORS_DEFORM_CONV_OP_IMPL_H_
M
Definition: any.cpp:108

caffe2::ReinitializeTensor
void ReinitializeTensor(Tensor *tensor, at::IntArrayRef dims, at::TensorOptions options)
Reinitialize a Tensor to given dims and options if necessary, note that this will not do anything if ...
Definition: tensor.cc:127

T
Definition: dataloader.cpp:482

caffe2
A global dictionary that holds information about what Caffe2 modules have been loaded in the current ...
Definition: blob.h:13

Tensor
Definition: ios_caffe_predictor.h:9

C
Definition: static.cpp:64