Caffe2 - C++ API
A deep learning, cross platform ML framework
conv_pool_op_base.h
1 #ifndef CAFFE2_OPERATORS_CONV_POOL_OP_BASE_H_
2 #define CAFFE2_OPERATORS_CONV_POOL_OP_BASE_H_
3 
4 #include <algorithm>
5 #include <vector>
6 
7 #include "caffe2/core/context.h"
8 #include "caffe2/core/logging.h"
9 #include "caffe2/core/operator.h"
10 #include "caffe2/proto/caffe2_legacy.pb.h"
11 #include "caffe2/utils/math.h"
12 
13 // This macro is here just to allow us to experiment with padding values that
14 // determines, when we have an odd number of pads, which side gets the one
15 // additional pad value, the head side, or the tail side. Setting it to false
16 // will enable the TensorFlow behavior, and setting it to true will enable
17 // a behavior more consistent with Caffe and CuDNN.
18 // This only affects the case when you set legacy pad to VALID or SAME. The
19 // behavior inherits from the early designs of Google's CNN implementation,
20 // where padding values are implicitly calculated instead of explicitly
21 // specified. This is still the case with TensorFlow. Many frameworks have
22 // followed a slightly different approach of explicitly giving padding values,
23 // in which case the value of this constant value does not matter.
24 const bool CAFFE2_PAD_HEAD_MORE = false;
25 
26 namespace caffe2 {
27 
28 template <class Context>
29 class ConvPoolOpBase : public Operator<Context> {
30  public:
31  USE_OPERATOR_CONTEXT_FUNCTIONS;
32  explicit ConvPoolOpBase(const OperatorDef& operator_def, Workspace* ws)
33  : Operator<Context>(operator_def, ws),
34  legacy_pad_(
35  static_cast<LegacyPadding>(this->template GetSingleArgument<int>(
36  "legacy_pad",
37  LegacyPadding::NOTSET))),
38  global_pooling_(
39  this->template GetSingleArgument<int>("global_pooling", 0)),
40  kernel_(this->template GetRepeatedArgument<int>("kernels")),
41  dilation_(this->template GetRepeatedArgument<int>("dilations")),
42  stride_(this->template GetRepeatedArgument<int>("strides")),
43  pads_(this->template GetRepeatedArgument<int>("pads")),
44  float16_compute_(
45  this->template GetSingleArgument<bool>("float16_compute", false)),
46  group_(this->template GetSingleArgument<int>("group", 1)),
47  order_(StringToStorageOrder(
48  this->template GetSingleArgument<string>("order", "NCHW"))),
49  shared_buffer_(
50  this->template GetSingleArgument<int>("shared_buffer", 0)),
51  ws_(ws) {
52  // For the padding, they should either be the legacy padding strategy
53  // (VALID or SAME), or an explicit, non-negative value.
54  if (legacy_pad_ == LegacyPadding::VALID ||
55  legacy_pad_ == LegacyPadding::SAME) {
56  CAFFE_ENFORCE(
58  "If you use legacy padding VALID or SAME, you should not specify "
59  "any specific padding values.");
60  }
61 
62  // Get old arguments values.
63  if (OperatorBase::HasArgument("kernel")) {
64  kernel_.resize(2, this->template GetSingleArgument<int>("kernel", 0));
65  } else if (
66  OperatorBase::HasArgument("kernel_h") &&
67  OperatorBase::HasArgument("kernel_w")) {
68  kernel_.push_back(this->template GetSingleArgument<int>("kernel_h", 0));
69  kernel_.push_back(this->template GetSingleArgument<int>("kernel_w", 0));
70  }
71 
72  if (OperatorBase::HasArgument("stride")) {
73  stride_.resize(2, this->template GetSingleArgument<int>("stride", 0));
74  } else if (
75  OperatorBase::HasArgument("stride_h") &&
76  OperatorBase::HasArgument("stride_w")) {
77  stride_.push_back(this->template GetSingleArgument<int>("stride_h", 0));
78  stride_.push_back(this->template GetSingleArgument<int>("stride_w", 0));
79  }
80 
81  if (OperatorBase::HasArgument("dilation")) {
82  dilation_.resize(2, this->template GetSingleArgument<int>("dilation", 0));
83  } else if (
84  OperatorBase::HasArgument("dilation_h") &&
85  OperatorBase::HasArgument("dilation_w")) {
86  dilation_.push_back(
87  this->template GetSingleArgument<int>("dilation_h", 0));
88  dilation_.push_back(
89  this->template GetSingleArgument<int>("dilation_w", 0));
90  }
91 
92  if (OperatorBase::HasArgument("pad")) {
93  CAFFE_ENFORCE(
94  legacy_pad_ != LegacyPadding::VALID &&
95  legacy_pad_ != LegacyPadding::SAME,
96  "If you use legacy padding VALID or SAME, you should not specify "
97  "any specific padding values.");
98  pads_.resize(4, this->template GetSingleArgument<int>("pad", 0));
99  } else if (
100  OperatorBase::HasArgument("pad_t") &&
101  OperatorBase::HasArgument("pad_l") &&
102  OperatorBase::HasArgument("pad_b") &&
103  OperatorBase::HasArgument("pad_r")) {
104  CAFFE_ENFORCE(
105  legacy_pad_ != LegacyPadding::VALID &&
106  legacy_pad_ != LegacyPadding::SAME,
107  "If you use legacy padding VALID or SAME, you should not specify "
108  "any specific padding values.");
109  pads_.push_back(this->template GetSingleArgument<int>("pad_t", 0));
110  pads_.push_back(this->template GetSingleArgument<int>("pad_l", 0));
111  pads_.push_back(this->template GetSingleArgument<int>("pad_b", 0));
112  pads_.push_back(this->template GetSingleArgument<int>("pad_r", 0));
113  }
114 
115  // Fill default values.
116  if (kernel_.size() == 0) {
117  kernel_.assign({0, 0});
118  }
119 
120  if (stride_.size() == 0) {
121  stride_.resize(kernel_.size(), 1);
122  }
123 
124  if (pads_.size() == 0) {
125  pads_.resize(kernel_.size() * 2, 0);
126  }
127 
128  if (dilation_.size() == 0) {
129  dilation_.resize(kernel_.size(), 1);
130  }
131 
132  CAFFE_ENFORCE_EQ(stride_.size(), kernel_.size());
133  CAFFE_ENFORCE_EQ(dilation_.size(), kernel_.size());
134 
135  if (legacy_pad_ != LegacyPadding::VALID &&
136  legacy_pad_ != LegacyPadding::SAME) {
137  CAFFE_ENFORCE_EQ(pads_.size(), 2 * kernel_.size());
138  }
139 
140  if (global_pooling_) {
141  for (size_t dim = 0; dim < kernel_.size(); ++dim) {
142  CAFFE_ENFORCE(
143  pads_[2 * dim] == 0 && pads_[2 * dim + 1] == 0 &&
144  dilation_[dim] == 1 && stride_[dim] == 1,
145  "If global_pooling is set pad, dilation and stride shouldn't be set.");
146  }
147  }
148 
149  // Check kernel only if we are doing conv or pooling. The reason is that a
150  // few other ops, like PadImage, are also using this base class. We really
151  // need to clean this up.
152  if (operator_def.name().find("Conv") == 0 ||
153  operator_def.name().find("Pool") != std::string::npos) {
154  for (size_t dim = 0; dim < kernel_.size(); ++dim) {
155  CAFFE_ENFORCE_GE(pads_[dim], 0);
156  CAFFE_ENFORCE_GE(pads_[kernel_.size() + dim], 0);
157  CAFFE_ENFORCE(
158  kernel_[dim],
159  "If you are doing convolution or pooling, you will need to set "
160  "explicitly the kernel size.");
161  }
162  }
163 
164  for (size_t dim = 0; dim < kernel_.size(); ++dim) {
165  CAFFE_ENFORCE_GE(kernel_[dim], 0);
166  CAFFE_ENFORCE_GE(dilation_[dim], 0);
167  CAFFE_ENFORCE_GE(stride_[dim], 0);
168  }
169  }
170 
171  // Returns the input image dimensions for the current storage order type.
172  vector<int> GetDims(const Tensor& input) {
173  vector<int> dims;
174  switch (order_) {
175  case StorageOrder::NCHW:
176  dims.assign(input.sizes().begin() + 2, input.sizes().end());
177  break;
178  case StorageOrder::NHWC:
179  dims.assign(input.sizes().begin() + 1, input.sizes().end() - 1);
180  break;
181  default:
182  CAFFE_THROW("Unknown storage order : ", order_);
183  }
184  return dims;
185  }
186 
187  // Returns the size of the input image for the current storage type.
188  int GetDimsSize(const Tensor& input) {
189  int size = 0;
190  switch (order_) {
191  case StorageOrder::NCHW:
192  size = std::accumulate(
193  input.sizes().begin() + 2,
194  input.sizes().end(),
195  1,
196  std::multiplies<int>());
197  break;
198  case StorageOrder::NHWC:
199  size = std::accumulate(
200  input.sizes().begin() + 1,
201  input.sizes().end() - 1,
202  1,
203  std::multiplies<int>());
204  break;
205  default:
206  CAFFE_THROW("Unknown storage order : ", order_);
207  }
208  return size;
209  }
210 
211  // Gets the output size. The output channel is manually provided since
212  // it may not be identical to the input channels.
213  // This function can be used in the forward functions to obtain the output
214  // sizes.
215  // Note(jiayq): the templatization of this function is mainly to help
216  // implementations that do not use first-class Tensor objects, such as the
217  // MKL operator. One can still call this function with dummy
218  // Tensor objects in order to obtain the sizes.
219  std::vector<int64_t> GetOutputSize(const Tensor& input, int output_channel) {
220  CAFFE_ENFORCE_GE(input.dim(), 2);
221  const int inner_size = input.size_from_dim(1);
222  CAFFE_ENFORCE_GT(inner_size, 0);
223  std::vector<int64_t> output_dims;
224  InferOutputSize64(
225  input.sizes(),
226  output_channel,
227  order_,
228  global_pooling_,
229  legacy_pad_,
230  dilation_,
231  stride_,
232  &kernel_,
233  &pads_,
234  &output_dims);
235  return output_dims;
236  }
237 
238  void SetOutputSize(const Tensor& input, Tensor* output, int output_channel) {
239  const int inner_size = input.size_from_dim(1);
240  CAFFE_ENFORCE_GT(inner_size, 0);
241  std::vector<int> output_dims;
242  InferOutputSize(
243  input.sizes(),
244  output_channel,
245  order_,
246  global_pooling_,
247  legacy_pad_,
248  dilation_,
249  stride_,
250  &kernel_,
251  &pads_,
252  &output_dims);
253  output->Resize(output_dims);
254  }
255 
256  // Helper function that is also called from OperatorSchema. Modified
257  // kernel parameters and output output_dims and channel_first.
258  static void InferOutputSize(
259  const at::IntArrayRef& input_dims,
260  const int output_channel,
261  const StorageOrder order,
262  const bool global_pooling,
263  const LegacyPadding legacy_pad,
264  const std::vector<int>& dilation,
265  const std::vector<int>& stride,
266  std::vector<int>* kernel,
267  std::vector<int>* pads,
268  std::vector<int>* output_dims) {
269  CAFFE_ENFORCE_NE(order, StorageOrder::UNKNOWN);
270  const int ndim = input_dims.size() - 2;
271  output_dims->resize(ndim + 2);
272  output_dims->front() = input_dims.front();
273  if (order == StorageOrder::NCHW) {
274  output_dims->at(1) = output_channel;
275  } else {
276  output_dims->back() = output_channel;
277  }
278  const int offset = order == StorageOrder::NCHW ? 2 : 1;
279  if (global_pooling) {
280  std::copy_n(input_dims.cbegin() + offset, ndim, kernel->begin());
281  std::fill_n(output_dims->begin() + offset, ndim, 1LL);
282  } else {
283  for (int i = 0; i < ndim; ++i) {
284  ComputeSizeAndPad(
285  input_dims[i + offset],
286  stride[i],
287  kernel->at(i),
288  dilation[i],
289  legacy_pad,
290  &pads->at(i),
291  &pads->at(i + ndim),
292  &output_dims->at(i + offset));
293  }
294  }
295  }
296 
297  static void InferOutputSize64(
298  const at::IntList& input_dims,
299  const int output_channel,
300  const StorageOrder order,
301  const bool global_pooling,
302  const LegacyPadding legacy_pad,
303  const std::vector<int>& dilation,
304  const std::vector<int>& stride,
305  std::vector<int>* kernel,
306  std::vector<int>* pads,
307  std::vector<int64_t>* output_dims) {
308  CAFFE_ENFORCE_NE(order, StorageOrder::UNKNOWN);
309  const int ndim = input_dims.size() - 2;
310  output_dims->resize(ndim + 2);
311  output_dims->front() = input_dims.front();
312  if (order == StorageOrder::NCHW) {
313  output_dims->at(1) = output_channel;
314  } else {
315  output_dims->back() = output_channel;
316  }
317  const int offset = order == StorageOrder::NCHW ? 2 : 1;
318  if (global_pooling) {
319  std::copy_n(input_dims.cbegin() + offset, ndim, kernel->begin());
320  std::fill_n(output_dims->begin() + offset, ndim, 1LL);
321  } else {
322  for (int i = 0; i < ndim; ++i) {
323  ComputeSizeAndPad64(
324  input_dims[i + offset],
325  stride[i],
326  kernel->at(i),
327  dilation[i],
328  legacy_pad,
329  &pads->at(i),
330  &pads->at(i + ndim),
331  &output_dims->at(i + offset));
332  }
333  }
334  }
335 
336  // ComputePads could be used in backward functions to figure out the padding
337  // values for the given input.
338  void ComputePads(const vector<int>& dims) {
339  if (global_pooling_) {
340  kernel_ = dims;
341  } else if (legacy_pad_ != LegacyPadding::NOTSET) {
342  int output_unused;
343  for (int dim = 0; dim < dims.size(); ++dim) {
344  ComputeSizeAndPad(
345  dims[dim],
346  stride_[dim],
347  kernel_[dim],
348  dilation_[dim],
349  legacy_pad_,
350  &pads_[dim],
351  &pads_[dims.size() + dim],
352  &output_unused);
353  }
354  }
355  }
356 
357  bool HasPad() const {
358  if (kernel_.size() == 2) {
359  return pad_t() > 0 || pad_b() > 0 || pad_l() > 0 || pad_r() > 0;
360  }
361  return std::any_of(
362  pads_.cbegin(), pads_.cend(), [](const int x) { return x > 0; });
363  }
364 
365  bool HasStride() const {
366  if (kernel_.size() == 2) {
367  return stride_h() > 1 || stride_w() > 1;
368  }
369  return std::any_of(
370  stride_.cbegin(), stride_.cend(), [](const int x) { return x > 1; });
371  }
372 
373  void SetDeviceTensor(const std::vector<int>& data, Tensor* tensor) {
374  bool reset_tensor_device_ = false;
375 
376  if (tensor->numel() != data.size()) {
377  tensor->Resize(data.size());
378  reset_tensor_device_ = true;
379  } else {
380  const int* tensor_data = tensor->template data<int>();
381  for (int d_i = 0; d_i < data.size(); ++d_i) {
382  if (tensor_data[d_i] != data[d_i]) {
383  reset_tensor_device_ = true;
384  break;
385  }
386  }
387  }
388 
389  if (reset_tensor_device_) {
390  context_.template Copy<int, CPUContext, Context>(
391  data.size(), data.data(), tensor->template mutable_data<int>());
392  }
393  }
394 
395  template <typename T>
396  void SetBiasMultiplier(const int size, Tensor* bias_multiplier_) {
397  if (bias_multiplier_->numel() != size) {
398  // If the helper bias multiplier is not image size, reshape and fill it
399  // with one.
400  bias_multiplier_->Resize(std::vector<int64_t>{size});
401  math::Set<T, Context>(
402  size,
403  static_cast<T>(1),
404  bias_multiplier_->template mutable_data<T>(),
405  &context_);
406  }
407  }
408 
409  bool RunOnDevice() override {
410  if (!global_pooling_) {
411  for (size_t dim = 0; dim < kernel_.size(); ++dim) {
412  CAFFE_ENFORCE_GT(kernel_[dim], 0);
413  }
414  }
415  switch (order_) {
416  case StorageOrder::NHWC:
417  // VLOG(2) << "Running NHWC";
418  return RunOnDeviceWithOrderNHWC();
419  case StorageOrder::NCHW:
420  // VLOG(2) << "Running NCHW";
421  return RunOnDeviceWithOrderNCHW();
422  default:
423  CAFFE_THROW("Unknown Storage order: ", order_);
424  }
425  }
426 
427  // The actual function that does the computation, if the different
428  // storage order leads to different implementations.
429  virtual bool RunOnDeviceWithOrderNHWC() {
430  CAFFE_NOT_IMPLEMENTED;
431  }
432  virtual bool RunOnDeviceWithOrderNCHW() {
433  CAFFE_NOT_IMPLEMENTED;
434  }
435 
436  static struct OpSchema::Cost CostInferenceForConv(
437  const OperatorDef& def,
438  const vector<TensorShape>& inputs) {
439  CAFFE_ENFORCE_GE(inputs.size(), 2, "Conv requires at least 2 inputs");
440  struct OpSchema::Cost c;
441  const TensorShape X = inputs[0];
442  const TensorShape W = inputs[1];
443  const TensorShape Y = TensorInferenceForConv(def, inputs)[0];
444  ArgumentHelper helper(def);
445  const auto order =
446  StringToStorageOrder(helper.GetSingleArgument<string>("order", "NCHW"));
447  uint64_t N;
448  uint64_t Y_h;
449  uint64_t Y_w = 1;
450  uint64_t Y_t = 1;
451  uint64_t kernel_h;
452  uint64_t kernel_w = 1;
453  uint64_t kernel_t = 1;
454  uint64_t in_channels;
455  uint64_t out_channels;
456 
457  if (X.dims_size() == 0 || W.dims_size() == 0) {
458  return c;
459  }
460  N = X.dims(0);
461  if (X.dims_size() == 5) {
462  // 3D convolution
463  CAFFE_ENFORCE_EQ(order, StorageOrder::NCHW, "Conv3D only supports NCHW");
464  Y_t = Y.dims(2);
465  Y_h = Y.dims(3);
466  Y_w = Y.dims(4);
467  kernel_t = W.dims(2);
468  kernel_h = W.dims(3);
469  kernel_w = W.dims(4);
470  in_channels = W.dims(1);
471  out_channels = W.dims(0);
472  } else if (X.dims_size() == 4) {
473  // 2D convolution
474  CAFFE_ENFORCE_EQ(W.dims_size(), 4, "Conv2D should have 4D filter tensor");
475  if (order == StorageOrder::NHWC) {
476  Y_h = Y.dims(1);
477  Y_w = Y.dims(2);
478  kernel_h = W.dims(1);
479  kernel_w = W.dims(2);
480  in_channels = W.dims(3);
481  out_channels = W.dims(0);
482  } else {
483  Y_h = Y.dims(2);
484  Y_w = Y.dims(3);
485  kernel_h = W.dims(2);
486  kernel_w = W.dims(3);
487  in_channels = W.dims(1);
488  out_channels = W.dims(0);
489  }
490  } else {
491  // 1D convolution
492  CAFFE_ENFORCE_EQ(W.dims_size(), 3, "Conv1D should have 3D filter tensor");
493  if (order == StorageOrder::NHWC) {
494  Y_h = Y.dims(1);
495  kernel_h = W.dims(1);
496  in_channels = W.dims(2);
497  out_channels = W.dims(0);
498  } else {
499  Y_h = Y.dims(2);
500  kernel_h = W.dims(2);
501  in_channels = W.dims(1);
502  out_channels = W.dims(0);
503  }
504  }
505 
506  uint64_t nElemX = nElemFromDim(X);
507  uint64_t nElemW = nElemFromDim(W);
508  uint64_t nElemBias = inputs.size() > 2 ? nElemFromDim(inputs[2]) : 0;
509 
510  // grouping is NOT properly handled yet
511  c.flops = N * Y_t * Y_h * Y_w * kernel_t * kernel_w * kernel_h *
512  in_channels * out_channels * 2;
513  c.bytes_read = (nElemX + nElemW + nElemBias) * sizeof(X.data_type());
514  c.bytes_written =
515  N * out_channels * Y_t * Y_h * Y_w * sizeof(Y.data_type());
516  c.params_bytes = out_channels * in_channels * kernel_t * kernel_h *
517  kernel_w * sizeof(W.data_type());
518  return c;
519  }
520 
521  static vector<TensorShape> TensorInferenceForSchema(
522  const OperatorDef& def,
523  const vector<TensorShape>& in,
524  int output_channel) {
525  ArgumentHelper helper(def);
526  CAFFE_ENFORCE_GT(in.size(), 0);
527  CAFFE_ENFORCE_GT(in[0].dims_size(), 0);
528  vector<int> pads = helper.GetRepeatedArgument<int>("pads");
529  vector<int> kernel = helper.GetRepeatedArgument<int>("kernels");
530  vector<int> strides = helper.GetRepeatedArgument<int>("strides");
531  vector<int> dilations = helper.GetRepeatedArgument<int>("dilation");
532  if (helper.HasArgument("pad")) {
533  pads.resize(4, helper.GetSingleArgument<int>("pad", 0));
534  } else if (
535  helper.HasArgument("pad_t") && helper.HasArgument("pad_l") &&
536  helper.HasArgument("pad_b") && helper.HasArgument("pad_r")) {
537  pads.push_back(helper.GetSingleArgument<int>("pad_t", 0));
538  pads.push_back(helper.GetSingleArgument<int>("pad_l", 0));
539  pads.push_back(helper.GetSingleArgument<int>("pad_b", 0));
540  pads.push_back(helper.GetSingleArgument<int>("pad_r", 0));
541  }
542 
543  if (helper.HasArgument("kernel")) {
544  kernel.resize(2, helper.GetSingleArgument<int>("kernel", 1));
545  } else if (
546  helper.HasArgument("kernel_h") && helper.HasArgument("kernel_w")) {
547  kernel.push_back(helper.GetSingleArgument<int>("kernel_h", 1));
548  kernel.push_back(helper.GetSingleArgument<int>("kernel_w", 1));
549  }
550 
551  if (helper.HasArgument("stride")) {
552  strides.resize(2, helper.GetSingleArgument<int>("stride", 1));
553  } else if (
554  helper.HasArgument("stride_h") && helper.HasArgument("stride_w")) {
555  strides.push_back(helper.GetSingleArgument<int>("stride_h", 1));
556  strides.push_back(helper.GetSingleArgument<int>("stride_w", 1));
557  }
558 
559  if (helper.HasArgument("dilation")) {
560  strides.resize(2, helper.GetSingleArgument<int>("dilation", 1));
561  } else if (
562  helper.HasArgument("dilation_h") && helper.HasArgument("dilation_w")) {
563  strides.push_back(helper.GetSingleArgument<int>("dilation_h", 1));
564  strides.push_back(helper.GetSingleArgument<int>("dilation_w", 1));
565  }
566 
567  auto check_and_set_default_value =
568  [](vector<int>& vec, int size, int value) {
569  if (vec.size() == 0) {
570  vec.resize(size, value);
571  }
572  };
573 
574  check_and_set_default_value(kernel, 2, 1);
575  check_and_set_default_value(strides, kernel.size(), 1);
576  check_and_set_default_value(pads, kernel.size() * 2, 0);
577  check_and_set_default_value(dilations, kernel.size(), 1);
578 
579  std::vector<int> output_dims;
581  GetDimsVector(in[0]),
582  output_channel,
583  StringToStorageOrder(helper.GetSingleArgument<string>("order", "NCHW")),
584  helper.GetSingleArgument<int>("global_pooling", 0),
585  static_cast<LegacyPadding>(
586  helper.GetSingleArgument<int>("legacy_pad", LegacyPadding::NOTSET)),
587  dilations,
588  strides,
589  &kernel,
590  &pads,
591  &output_dims);
592  return {CreateTensorShape(output_dims, TensorProto::FLOAT)};
593  }
594 
595  static std::vector<TensorShape> TensorInferenceForConv(
596  const OperatorDef& def,
597  const std::vector<TensorShape>& in) {
598  if (in[0].unknown_shape()) {
599  std::vector<TensorShape> out(1);
600  out[0].set_unknown_shape(true);
601  return out;
602  }
603  return TensorInferenceForSchema(def, in, in[1].dims(0));
604  }
605 
606  static std::vector<TensorShape> TensorInferenceForPool(
607  const OperatorDef& def,
608  const std::vector<TensorShape>& in) {
609  if (in[0].unknown_shape()) {
610  std::vector<TensorShape> out(1);
611  out[0].set_unknown_shape(true);
612  return out;
613  }
614  ArgumentHelper helper(def);
615  auto order =
616  StringToStorageOrder(helper.GetSingleArgument<string>("order", "NCHW"));
617  int num_channels =
618  (order == StorageOrder::NCHW ? in[0].dims(1) : in[0].dims(3));
619  return TensorInferenceForSchema(def, in, num_channels);
620  }
621 
622  static std::vector<TensorShape> TensorInferenceForLC(
623  const OperatorDef& def,
624  const std::vector<TensorShape>& in) {
625  if (in[0].unknown_shape()) {
626  std::vector<TensorShape> out(1);
627  out[0].set_unknown_shape(true);
628  return out;
629  }
630  const int img_ndim = in[0].dims_size() - 2;
631  return TensorInferenceForSchema(def, in, in[1].dims(img_ndim));
632  }
633 
634  virtual ~ConvPoolOpBase() {}
635 
636  protected:
637  LegacyPadding legacy_pad_;
638  bool global_pooling_;
639  vector<int> kernel_;
640  vector<int> dilation_;
641  vector<int> stride_;
642  vector<int> pads_;
643 
644  bool float16_compute_;
645 
646  int group_;
647  StorageOrder order_;
648  bool shared_buffer_;
649  Workspace* ws_;
650 
651  static inline void ComputeSizeAndPad(
652  const int in_size,
653  const int stride,
654  const int kernel,
655  const int dilation,
656  LegacyPadding legacy_pad,
657  int* pad_head,
658  int* pad_tail,
659  int* out_size) {
660  const int dkernel = dilation * (kernel - 1) + 1;
661  switch (legacy_pad) {
662  case LegacyPadding::NOTSET:
663  // We will just use the direct padding head and tail values, but we
664  // will verify that they are non-negative.
665  CAFFE_ENFORCE_GE(in_size + *pad_head + *pad_tail, dkernel);
666  *out_size = static_cast<int>(
667  static_cast<float>(in_size + *pad_head + *pad_tail - dkernel) /
668  stride +
669  1);
670  break;
671  case LegacyPadding::VALID:
672  *pad_head = 0;
673  *pad_tail = 0;
674  *out_size = (in_size - dkernel) / stride + 1;
675  break;
676  case LegacyPadding::SAME: {
677  CAFFE_ENFORCE(
678  1 == dilation, "Dilation not supported for legacy padding.");
679  int legacy_target_size = (in_size + stride - 1) / stride;
680  int pad_needed = (legacy_target_size - 1) * stride + kernel - in_size;
681  if (CAFFE2_PAD_HEAD_MORE) {
682  *pad_head = (pad_needed + 1) / 2;
683  } else {
684  *pad_head = pad_needed / 2;
685  }
686  *pad_tail = pad_needed - *pad_head;
687  *out_size = (in_size + pad_needed - dkernel) / stride + 1;
688  break;
689  }
690  case LegacyPadding::CAFFE_LEGACY_POOLING:
691  // This is in order to adapt Caffe's pooling padding case. In this case,
692  // we will only use pad_head and will compute pad_tail to match the
693  // old caffe pooling strategy. Also see caffe2_legacy.proto for more
694  // details.
695  CAFFE_ENFORCE_GE(*pad_head, 0);
696  // Here, notice that caffe casts UP while caffe2 casts DOWN for the
697  // output size computation.
698  *out_size = std::ceil(
699  static_cast<float>(in_size + *pad_head * 2 - kernel) / stride + 1);
700  // If we have padding, caffe also ensures that the last pooling starts
701  // strictly inside the image (instead of at the padding); otherwise clip
702  // the last.
703  if (*pad_head > 0 && (*out_size - 1) * stride >= in_size + *pad_head) {
704  --*out_size;
705  }
706  // Now, compare the output size with the standard Caffe2 output size.
707  // The
708  // caffe2 standard output size should always be no larger than the
709  // output
710  // size of caffe.
711  int standard_out_size = static_cast<int>(
712  static_cast<float>(in_size + *pad_head * 2 - kernel) / stride + 1);
713  CAFFE_ENFORCE_GE(
714  *out_size,
715  standard_out_size,
716  "This should never happen. If this happens, double check the logic "
717  "above.");
718  if (*out_size > standard_out_size) {
719  LOG(WARNING)
720  << "You are hitting a case where Caffe's legacy padding calculation "
721  "is hit. This leads to inefficient and sometimes incorrect "
722  "results. We are keeping this behavior for backward compatibility"
723  ", but you are strongly recommended to move away from it.";
724  }
725  *pad_tail = *pad_head + stride * (*out_size - standard_out_size);
726  break;
727  }
728  }
729 
730  static inline void ComputeSizeAndPad64(
731  const int in_size,
732  const int stride,
733  const int kernel,
734  const int dilation,
735  LegacyPadding legacy_pad,
736  int* pad_head,
737  int* pad_tail,
738  int64_t* out_size) {
739  const int dkernel = dilation * (kernel - 1) + 1;
740  switch (legacy_pad) {
741  case LegacyPadding::NOTSET:
742  // We will just use the direct padding head and tail values, but we
743  // will verify that they are non-negative.
744  CAFFE_ENFORCE_GE(in_size + *pad_head + *pad_tail, dkernel);
745  *out_size = static_cast<int>(
746  static_cast<float>(in_size + *pad_head + *pad_tail - dkernel) /
747  stride +
748  1);
749  break;
750  case LegacyPadding::VALID:
751  *pad_head = 0;
752  *pad_tail = 0;
753  *out_size = (in_size - dkernel) / stride + 1;
754  break;
755  case LegacyPadding::SAME: {
756  CAFFE_ENFORCE(
757  1 == dilation, "Dilation not supported for legacy padding.");
758  int legacy_target_size = (in_size + stride - 1) / stride;
759  int pad_needed = (legacy_target_size - 1) * stride + kernel - in_size;
760  if (CAFFE2_PAD_HEAD_MORE) {
761  *pad_head = (pad_needed + 1) / 2;
762  } else {
763  *pad_head = pad_needed / 2;
764  }
765  *pad_tail = pad_needed - *pad_head;
766  *out_size = (in_size + pad_needed - dkernel) / stride + 1;
767  break;
768  }
769  case LegacyPadding::CAFFE_LEGACY_POOLING:
770  // This is in order to adapt Caffe's pooling padding case. In this case,
771  // we will only use pad_head and will compute pad_tail to match the
772  // old caffe pooling strategy. Also see caffe2_legacy.proto for more
773  // details.
774  CAFFE_ENFORCE_GE(*pad_head, 0);
775  // Here, notice that caffe casts UP while caffe2 casts DOWN for the
776  // output size computation.
777  *out_size = std::ceil(
778  static_cast<float>(in_size + *pad_head * 2 - kernel) / stride + 1);
779  // If we have padding, caffe also ensures that the last pooling starts
780  // strictly inside the image (instead of at the padding); otherwise clip
781  // the last.
782  if (*pad_head > 0 && (*out_size - 1) * stride >= in_size + *pad_head) {
783  --*out_size;
784  }
785  // Now, compare the output size with the standard Caffe2 output size.
786  // The
787  // caffe2 standard output size should always be no larger than the
788  // output
789  // size of caffe.
790  int standard_out_size = static_cast<int>(
791  static_cast<float>(in_size + *pad_head * 2 - kernel) / stride + 1);
792  CAFFE_ENFORCE_GE(
793  *out_size,
794  standard_out_size,
795  "This should never happen. If this happens, double check the logic "
796  "above.");
797  if (*out_size > standard_out_size) {
798  LOG(WARNING)
799  << "You are hitting a case where Caffe's legacy padding calculation "
800  "is hit. This leads to inefficient and sometimes incorrect "
801  "results. We are keeping this behavior for backward compatibility"
802  ", but you are strongly recommended to move away from it.";
803  }
804  *pad_tail = *pad_head + stride * (*out_size - standard_out_size);
805  break;
806  }
807  }
808 
809  // Accessors for 2D conv params.
810 
811  inline int pad_t() const {
812  return pads_[0];
813  }
814 
815  inline int pad_l() const {
816  return pads_[1];
817  }
818 
819  inline int pad_b() const {
820  return pads_[2];
821  }
822 
823  inline int pad_r() const {
824  return pads_[3];
825  }
826 
827  inline int kernel_h() const {
828  return kernel_[0];
829  }
830 
831  inline int kernel_w() const {
832  return kernel_[1];
833  }
834 
835  inline int stride_h() const {
836  return stride_[0];
837  }
838 
839  inline int stride_w() const {
840  return stride_[1];
841  }
842 
843  inline int dilation_h() const {
844  return dilation_[0];
845  }
846 
847  inline int dilation_w() const {
848  return dilation_[1];
849  }
850 
851  private:
852  inline void AllocateAndCopy(const vector<int>& vec, Tensor& tensor) {
853  tensor.Resize(vec.size());
854  context_.template CopyFromCPU<int>(
855  vec.size(), vec.data(), tensor.template mutable_data<int>());
856  }
857 
858 #define USE_CONV_POOL_BASE_FUNCTIONS(Context) \
859  USE_OPERATOR_FUNCTIONS(Context); \
860  using ConvPoolOpBase<Context>::pads_; \
861  using ConvPoolOpBase<Context>::pad_t; \
862  using ConvPoolOpBase<Context>::pad_l; \
863  using ConvPoolOpBase<Context>::pad_b; \
864  using ConvPoolOpBase<Context>::pad_r; \
865  using ConvPoolOpBase<Context>::legacy_pad_; \
866  using ConvPoolOpBase<Context>::global_pooling_; \
867  using ConvPoolOpBase<Context>::kernel_; \
868  using ConvPoolOpBase<Context>::kernel_h; \
869  using ConvPoolOpBase<Context>::kernel_w; \
870  using ConvPoolOpBase<Context>::dilation_; \
871  using ConvPoolOpBase<Context>::dilation_h; \
872  using ConvPoolOpBase<Context>::dilation_w; \
873  using ConvPoolOpBase<Context>::stride_; \
874  using ConvPoolOpBase<Context>::stride_h; \
875  using ConvPoolOpBase<Context>::stride_w; \
876  using ConvPoolOpBase<Context>::group_; \
877  using ConvPoolOpBase<Context>::order_; \
878  using ConvPoolOpBase<Context>::shared_buffer_; \
879  using ConvPoolOpBase<Context>::GetDims; \
880  using ConvPoolOpBase<Context>::GetDimsSize; \
881  using ConvPoolOpBase<Context>::SetDeviceTensor; \
882  using ConvPoolOpBase<Context>::HasPad; \
883  using ConvPoolOpBase<Context>::HasStride; \
884  using ConvPoolOpBase<Context>::ws_
885 };
886 
887 } // namespace caffe2
888 
889 #endif // CAFFE2_OPERATORS_CONV_POOL_OP_BASE_H_
AT_CPP14_CONSTEXPR const T & front() const
front - Get the first element.
Definition: ArrayRef.h:143
A helper class to index into arguments.
Definition: proto_utils.h:200
Workspace is a class that holds all the related objects created during runtime: (1) all blobs...
Definition: workspace.h:47
constexpr size_t size() const
size - Get the array size.
Definition: ArrayRef.h:138
A global dictionary that holds information about what Caffe2 modules have been loaded in the current ...
Definition: blob.h:13
bool HasArgument(const string &name) const
Checks if the operator has an argument of the given name.
Definition: operator.h:70
AT_CPP14_CONSTEXPR const T & at(size_t Index) const
Vector compatibility.
Definition: ArrayRef.h:186