Caffe2 - C++ API
A deep learning, cross platform ML framework
conv_transpose_op_impl.h
1 
17 // conv_transpose_op_impl.h is the templated implementation of the
18 // conv_transpose_op.h file.
19 #ifndef CAFFE2_OPERATORS_CONV_TRANSPOSE_OP_IMPL_H_
20 #define CAFFE2_OPERATORS_CONV_TRANSPOSE_OP_IMPL_H_
21 
22 #include "caffe2/core/context.h"
23 #include "caffe2/core/logging.h"
24 #include "caffe2/core/operator.h"
25 #include "caffe2/operators/conv_op_shared.h"
26 #include "caffe2/operators/conv_transpose_op.h"
27 #include "caffe2/operators/conv_transpose_unpool_op_base.h"
28 #include "caffe2/utils/math.h"
29 
30 CAFFE2_DECLARE_bool(caffe2_force_shared_col_buffer);
31 
32 namespace caffe2 {
33 
34 template <typename T, class Context>
35 bool ConvTransposeOp<T, Context>::RunOnDeviceWithOrderNCHW() {
36  const Tensor<Context>& X = Input(INPUT);
37  auto& filter = Input(FILTER);
38  Tensor<Context>* Y = Output(0);
39  const int N = X.dim32(0), M = X.dim32(1), H = X.dim32(2), W = X.dim32(3);
40  CAFFE_ENFORCE(filter.ndim() == 4, "filter must be 4D tensor");
41  CAFFE_ENFORCE(
42  filter.dim32(0) == M,
43  "filter number must be equal to input channel number");
44  const int C = filter.dim32(1);
45  CAFFE_ENFORCE(
46  filter.dim32(2) == this->kernel_h(),
47  "filter height must be equal to kernel height");
48  CAFFE_ENFORCE(
49  filter.dim32(3) == this->kernel_w(),
50  "filter width must be equal to kernel width");
51  ConvTransposeUnpoolBase<Context>::SetOutputSize(X, Y, C);
52 
53  const int kernel_dim = C * this->kernel_h() * this->kernel_w();
54  const int input_image_size = H * W;
55  const int output_image_size = Y->dim32(2) * Y->dim32(3);
56 
57 #ifndef __ARM_NEON__
58  if (InputSize() == 3) {
59  auto& bias = Input(BIAS);
60  CAFFE_ENFORCE(bias.ndim() == 1, "bias must be 1D tensor");
61  CAFFE_ENFORCE(
62  bias.dim32(0) == C,
63  "bias dimension must be equal to output channel number");
64  if (bias_multiplier_.size() != output_image_size) {
65  bias_multiplier_.Resize(vector<TIndex>(1, output_image_size));
66  T* bm_data = bias_multiplier_.template mutable_data<T>();
67  math::Set<T, Context>(
68  output_image_size,
69  static_cast<T>(1),
70  bm_data,
71  &context_);
72  }
73  }
74 #endif // !__ARM_NEON__
75 
76  const T* Xdata = X.template data<T>();
77  const T* filter_data = filter.template data<T>();
78  T* Ydata = Y->template mutable_data<T>();
79 
80  auto f = [&](Tensor<Context>* col_buffer) {
81  col_buffer->Resize(
82  vector<TIndex>{C, this->kernel_h(), this->kernel_w(), H, W});
83  T* col_buffer_data = col_buffer->template mutable_data<T>();
84  for (auto image_id = 0; image_id < N; ++image_id) {
85  // Weight term
86  math::Gemm<T, Context>(
87  CblasTrans,
88  CblasNoTrans,
89  kernel_dim,
90  input_image_size,
91  M,
92  1,
93  filter_data,
94  Xdata,
95  0,
96  col_buffer_data,
97  &context_);
98 
99  // Col2im
100  math::Col2im<T, Context, StorageOrder::NCHW>(
101  col_buffer_data,
102  C,
103  Y->dim32(2),
104  Y->dim32(3),
105  this->kernel_h(),
106  this->kernel_w(),
107  1,
108  1,
109  this->pad_t(),
110  this->pad_l(),
111  this->pad_b(),
112  this->pad_r(),
113  this->stride_h(),
114  this->stride_w(),
115  Ydata,
116  &context_);
117 
118  // Bias term
119  if (InputSize() == 3) {
120  const T* bias_data = Input(BIAS).template data<T>();
121 #ifndef __ARM_NEON__
122  const T* bm_data = bias_multiplier_.template data<T>();
123  math::Gemm<T, Context>(
124  CblasNoTrans,
125  CblasNoTrans,
126  C,
127  output_image_size,
128  1,
129  1,
130  bias_data,
131  bm_data,
132  1,
133  Ydata,
134  &context_);
135 #else
136  math::BiasCHW<T, Context>(
137  bias_data,
138  C,
139  output_image_size,
140  Ydata,
141  &context_);
142 #endif // !__ARM_NEON__
143  }
144 
145  Xdata += M * H * W;
146  Ydata += Y->size() / Y->dim32(0);
147  }
148  };
149  if (FLAGS_caffe2_force_shared_col_buffer || shared_buffer_) {
150  runWithSharedBuffer<Context>(ws_, f);
151  } else {
152  f(&col_buffer_);
153  }
154  return true;
155 }
156 
157 template <typename T, class Context>
158 bool ConvTransposeOp<T, Context>::RunOnDeviceWithOrderNHWC() {
159  const Tensor<Context>& X = Input(INPUT);
160  auto& filter = Input(FILTER);
161  Tensor<Context>* Y = Output(0);
162  const auto N = X.dim32(0), H = X.dim32(1), W = X.dim32(2), M = X.dim32(3);
163  CAFFE_ENFORCE(filter.ndim() == 4, "filter must be 4D tensor");
164  CAFFE_ENFORCE(
165  filter.dim32(0) == M,
166  "filter number must be equal to input channel number");
167  CAFFE_ENFORCE(
168  filter.dim32(1) == this->kernel_h(),
169  "filter height must be equal to kernel height");
170  CAFFE_ENFORCE(
171  filter.dim32(2) == this->kernel_w(),
172  "filter width must be equal to kernel width");
173  const int C = filter.dim32(3);
174  ConvTransposeUnpoolBase<Context>::SetOutputSize(X, Y, C);
175 
176  const auto kernel_dim = C * this->kernel_h() * this->kernel_w();
177  const auto input_image_size = H * W;
178  const auto output_image_size = Y->dim32(1) * Y->dim32(2);
179 
180  if (InputSize() == 3) {
181  auto& bias = Input(BIAS);
182  CAFFE_ENFORCE(bias.ndim() == 1, "bias must be 1D tensor");
183  CAFFE_ENFORCE(
184  bias.dim32(0) == C,
185  "bias dimension must be equal to output channel number");
186  if (bias_multiplier_.size() != output_image_size) {
187  bias_multiplier_.Resize(vector<TIndex>(1, output_image_size));
188  T* bm_data = bias_multiplier_.template mutable_data<T>();
189  math::Set<T, Context>(
190  output_image_size,
191  static_cast<T>(1),
192  bm_data,
193  &context_);
194  }
195  }
196  const T* Xdata = X.template data<T>();
197  const T* filter_data = filter.template data<T>();
198  T* Ydata = Y->template mutable_data<T>();
199 
200  auto f = [&](Tensor<Context>* /*col_buffer*/) {
201  col_buffer_.Resize(
202  vector<TIndex>{H, W, this->kernel_h(), this->kernel_w(), C});
203  T* col_buffer_data = col_buffer_.template mutable_data<T>();
204  for (auto image_id = 0; image_id < N; ++image_id) {
205  // Weight term
206  math::Gemm<T, Context>(
207  CblasNoTrans,
208  CblasNoTrans,
209  input_image_size,
210  kernel_dim,
211  M,
212  1,
213  Xdata,
214  filter_data,
215  0,
216  col_buffer_data,
217  &context_);
218  // Col2im
219  math::Col2im<T, Context, StorageOrder::NHWC>(
220  col_buffer_data,
221  C,
222  Y->dim32(1),
223  Y->dim32(2),
224  this->kernel_h(),
225  this->kernel_w(),
226  1,
227  1,
228  this->pad_t(),
229  this->pad_l(),
230  this->pad_b(),
231  this->pad_r(),
232  this->stride_h(),
233  this->stride_w(),
234  Ydata,
235  &context_);
236  // Bias term
237  if (InputSize() == 3) {
238  const T* bm_data = bias_multiplier_.template data<T>();
239  const T* bias_data = Input(BIAS).template data<T>();
240  math::Gemm<T, Context>(
241  CblasNoTrans,
242  CblasNoTrans,
243  output_image_size,
244  C,
245  1,
246  1,
247  bm_data,
248  bias_data,
249  1,
250  Ydata,
251  &context_);
252  }
253  Xdata += M * H * W;
254  Ydata += Y->size() / Y->dim32(0);
255  }
256  };
257  if (FLAGS_caffe2_force_shared_col_buffer || shared_buffer_) {
258  runWithSharedBuffer<Context>(ws_, f);
259  } else {
260  f(&col_buffer_);
261  }
262  return true;
263 }
264 
265 template <typename T, class Context>
266 bool ConvTransposeGradientOp<T, Context>::RunOnDeviceWithOrderNCHW() {
267  auto& X = Input(INPUT);
268  auto& filter = Input(FILTER);
269  auto& dY = Input(OUTPUT_GRAD);
270  auto* dfilter = Output(FILTER_GRAD);
271  const int N = X.dim32(0), M = X.dim32(1), H = X.dim32(2), W = X.dim32(3);
272  // We only handle LegacyPadding::NOTSET case and ignore cases of
273  // LegacyPadding::VALID and LegacyPadding::SAME
274  // Thus, we don't need to manually compute padding values
275  // We simply use the values from the user
276  CAFFE_ENFORCE(filter.ndim() == 4);
277  const int C = filter.dim32(1);
278  CAFFE_ENFORCE(
279  filter.dim32(2) == this->kernel_h(),
280  "filter height must be equal to kernel height");
281  CAFFE_ENFORCE(
282  filter.dim32(3) == this->kernel_w(),
283  "filter width must be equal to kernel width");
284  dfilter->ResizeLike(filter);
285 
286  const int kernel_dim = C * this->kernel_h() * this->kernel_w();
287  const int output_image_size = dY.dim32(2) * dY.dim32(3);
288  // The col buffer is stored in CHW order as well
289  col_buffer_.Resize(
290  vector<TIndex>{C, this->kernel_h(), this->kernel_w(), H, W});
291  if (!no_bias_) {
292  auto* dbias = Output(BIAS_OR_INPUT_GRAD);
293  dbias->Resize(C);
294  if (bias_multiplier_.size() != output_image_size) {
295  bias_multiplier_.Resize(1, output_image_size);
296  T* bm_data = bias_multiplier_.template mutable_data<T>();
297  math::Set<T, Context>(
298  output_image_size,
299  static_cast<T>(1),
300  bm_data,
301  &context_);
302  }
303  }
304  T* col_buffer_data = col_buffer_.template mutable_data<T>();
305  const T* Xdata = X.template data<T>();
306  const T* filter_data = filter.template data<T>();
307  const T* dYdata = dY.template data<T>();
308  T* dfilter_data = dfilter->template mutable_data<T>();
309  // Pre-setting the gradients to zero
310  math::Set<T, Context>(dfilter->size(), 0, dfilter_data, &context_);
311  if (!no_bias_) {
312  auto* dbias = Output(BIAS_OR_INPUT_GRAD);
313  T* dbias_data = dbias->template mutable_data<T>();
314  math::Set<T, Context>(dbias->size(), 0, dbias_data, &context_);
315  }
316  for (auto image_id = 0; image_id < N; ++image_id) {
317  // gradient w.r.t. filters. Im2col followed by Gemm
318  // Im2col.
319  math::Im2col<T, Context, StorageOrder::NCHW>(
320  dYdata,
321  C,
322  dY.dim32(2),
323  dY.dim32(3),
324  this->kernel_h(),
325  this->kernel_w(),
326  1,
327  1,
328  this->pad_t(),
329  this->pad_l(),
330  this->pad_b(),
331  this->pad_r(),
332  this->stride_h(),
333  this->stride_w(),
334  col_buffer_data,
335  &context_);
336  // Gemm
337  math::Gemm<T, Context>(
338  CblasNoTrans,
339  CblasTrans,
340  M,
341  kernel_dim,
342  H * W,
343  1,
344  Xdata,
345  col_buffer_data,
346  1,
347  dfilter_data,
348  &context_);
349  // gradient w.r.t. bias
350  if (!no_bias_) {
351  const T* bm_data = bias_multiplier_.template data<T>();
352  T* input_grad_data = Output(BIAS_OR_INPUT_GRAD)->template mutable_data<T>();
353  math::Gemm<T, Context>(
354  CblasNoTrans,
355  CblasNoTrans,
356  C,
357  1,
358  output_image_size,
359  1,
360  dYdata,
361  bm_data,
362  1,
363  input_grad_data,
364  &context_);
365  }
366  dYdata += dY.size() / dY.dim32(0);
367  Xdata += X.size() / X.dim32(0);
368  }
369  if (OutputSize() == 3 || (no_bias_ && (OutputSize() == 2))) {
370  // Compute gradients w.r.t. the input
371  // Since we have changed dYdata in the above loop, we will need to reset.
372  dYdata = dY.template data<T>();
373  auto* dX = Output(no_bias_ ? BIAS_OR_INPUT_GRAD : INPUT_GRAD);
374  dX->ResizeLike(X);
375  T* dXdata = dX->template mutable_data<T>();
376  for (auto image_id = 0; image_id < N; ++image_id) {
377  // Im2col.
378  // TODO(zyan3): Probably duplicate work as in gradient computation
379  // w.r.t filters
380  math::Im2col<T, Context, StorageOrder::NCHW>(
381  dYdata,
382  C,
383  dY.dim32(2),
384  dY.dim32(3),
385  this->kernel_h(),
386  this->kernel_w(),
387  1,
388  1,
389  this->pad_t(),
390  this->pad_l(),
391  this->pad_b(),
392  this->pad_r(),
393  this->stride_h(),
394  this->stride_w(),
395  col_buffer_data,
396  &context_);
397  // Gemm
398  math::Gemm<T, Context>(
399  CblasNoTrans,
400  CblasNoTrans,
401  M,
402  H * W,
403  kernel_dim,
404  1,
405  filter_data,
406  col_buffer_data,
407  0,
408  dXdata,
409  &context_);
410  dYdata += dY.size() / dY.dim32(0);
411  dXdata += X.size() / X.dim32(0);
412  }
413  }
414  return true;
415 }
416 
417 template <typename T, class Context>
418 bool ConvTransposeGradientOp<T, Context>::RunOnDeviceWithOrderNHWC() {
419  auto& X = Input(INPUT);
420  auto& filter = Input(FILTER);
421  auto& dY = Input(OUTPUT_GRAD);
422  auto* dfilter = Output(FILTER_GRAD);
423  const int N = X.dim32(0), H = X.dim32(1), W = X.dim32(2), M = X.dim32(3);
424  // We only handle LegacyPadding::NOTSET case and ignore cases of
425  // LegacyPadding::VALID and LegacyPadding::SAME
426  // Thus, we don't need to manually compute padding values
427  // We simply use the values from the user
428  CAFFE_ENFORCE(filter.ndim() == 4, "filter must be 4D tensor");
429  CAFFE_ENFORCE(
430  filter.dim32(1) == this->kernel_h(),
431  "filter height must be equal to kernel height");
432  CAFFE_ENFORCE(
433  filter.dim32(2) == this->kernel_w(),
434  "filter width must be equal to kernel width");
435  const int C = filter.dim32(3);
436  dfilter->ResizeLike(filter);
437 
438  const int kernel_dim = C * this->kernel_h() * this->kernel_w();
439  const int output_image_size = dY.dim32(1) * dY.dim32(2);
440  // The col buffer is stored in HWC order as well
441  col_buffer_.Resize(
442  vector<TIndex>{H, W, this->kernel_h(), this->kernel_w(), C});
443  if (!no_bias_) {
444  auto* dbias = Output(BIAS_OR_INPUT_GRAD);
445  dbias->Resize(C);
446  if (bias_multiplier_.size() != output_image_size) {
447  bias_multiplier_.Resize(1, output_image_size);
448  T* bm_data = bias_multiplier_.template mutable_data<T>();
449  math::Set<T, Context>(
450  output_image_size,
451  static_cast<T>(1),
452  bm_data,
453  &context_);
454  }
455  }
456  T* col_buffer_data = col_buffer_.template mutable_data<T>();
457  const T* Xdata = X.template data<T>();
458  const T* filter_data = filter.template data<T>();
459  const T* dYdata = dY.template data<T>();
460  T* dfilter_data = dfilter->template mutable_data<T>();
461  // Pre-setting the gradients to zero
462  math::Set<T, Context>(dfilter->size(), 0, dfilter_data, &context_);
463  if (!no_bias_) {
464  auto* dbias = Output(BIAS_OR_INPUT_GRAD);
465  T* dbias_data = dbias->template mutable_data<T>();
466  math::Set<T, Context>(dbias->size(), 0, dbias_data, &context_);
467  }
468  for (auto image_id = 0; image_id < N; ++image_id) {
469  // gradient w.r.t. filters. Im2col followed by Gemm
470  // Im2col.
471  math::Im2col<T, Context, StorageOrder::NHWC>(
472  dYdata,
473  C,
474  dY.dim32(1),
475  dY.dim32(2),
476  this->kernel_h(),
477  this->kernel_w(),
478  1,
479  1,
480  this->pad_t(),
481  this->pad_l(),
482  this->pad_b(),
483  this->pad_r(),
484  this->stride_h(),
485  this->stride_w(),
486  col_buffer_data,
487  &context_);
488  // Gemm
489  math::Gemm<T, Context>(
490  CblasTrans,
491  CblasNoTrans,
492  M,
493  kernel_dim,
494  H * W,
495  1,
496  Xdata,
497  col_buffer_data,
498  1,
499  dfilter_data,
500  &context_);
501  // gradients w.r.t. bias
502  if (!no_bias_) {
503  const T* bm_data = bias_multiplier_.template data<T>();
504  T* input_grad_data = Output(BIAS_OR_INPUT_GRAD)->template mutable_data<T>();
505  math::Gemm<T, Context>(
506  CblasTrans,
507  CblasNoTrans,
508  C,
509  1,
510  output_image_size,
511  1,
512  dYdata,
513  bm_data,
514  1,
515  input_grad_data,
516  &context_);
517  }
518  dYdata += dY.size() / dY.dim32(0);
519  Xdata += X.size() / X.dim32(0);
520  }
521  if (OutputSize() == 3 || (no_bias_ && (OutputSize() == 2))) {
522  // Compute gradients w.r.t. the input
523  // Since we have changed dYdata in the above loop, we will need to reset.
524  dYdata = dY.template data<T>();
525  auto* dX = Output(no_bias_ ? BIAS_OR_INPUT_GRAD : INPUT_GRAD);
526  dX->ResizeLike(X);
527  T* dXdata = dX->template mutable_data<T>();
528  for (auto image_id = 0; image_id < N; ++image_id) {
529  // Im2col.
530  // TODO(zyan3): Probably duplicate work as in gradient computation
531  // w.r.t filters
532  math::Im2col<T, Context, StorageOrder::NHWC>(
533  dYdata,
534  C,
535  dY.dim32(1),
536  dY.dim32(2),
537  this->kernel_h(),
538  this->kernel_w(),
539  1,
540  1,
541  this->pad_t(),
542  this->pad_l(),
543  this->pad_b(),
544  this->pad_r(),
545  this->stride_h(),
546  this->stride_w(),
547  col_buffer_data,
548  &context_);
549  // Gemm
550  math::Gemm<T, Context>(
551  CblasNoTrans,
552  CblasTrans,
553  H * W,
554  M,
555  kernel_dim,
556  1,
557  col_buffer_data,
558  filter_data,
559  0,
560  dXdata,
561  &context_);
562  dYdata += dY.size() / dY.dim32(0);
563  dXdata += X.size() / X.dim32(0);
564  }
565  }
566  return true;
567 }
568 
569 } // namespace caffe2
570 #endif // CAFFE2_OPERATORS_CONV_TRANSPOSE_OP_IMPL_H_
Copyright (c) 2016-present, Facebook, Inc.
Copyright (c) 2016-present, Facebook, Inc.