3 #ifndef CAFFE2_OPERATORS_CONV_TRANSPOSE_OP_IMPL_H_ 4 #define CAFFE2_OPERATORS_CONV_TRANSPOSE_OP_IMPL_H_ 6 #include "caffe2/core/context.h" 7 #include "caffe2/core/logging.h" 8 #include "caffe2/core/operator.h" 9 #include "caffe2/operators/conv_op_shared.h" 10 #include "caffe2/operators/conv_transpose_op.h" 11 #include "caffe2/operators/conv_transpose_unpool_op_base.h" 12 #include "caffe2/utils/math.h" 14 C10_DECLARE_bool(caffe2_force_shared_col_buffer);
18 template <
typename T,
class Context>
19 bool ConvTransposeOp<T, Context>::RunOnDeviceWithOrderNCHW() {
20 const Tensor& X = Input(INPUT);
21 auto& filter = Input(FILTER);
22 const int N = X.dim32(0),
M = X.dim32(1), H = X.dim32(2), W = X.dim32(3);
23 CAFFE_ENFORCE(filter.dim() == 4,
"filter must be 4D tensor");
26 "filter number must be equal to input channel number");
27 const int C = filter.dim32(1);
29 filter.dim32(2) == this->kernel_h(),
30 "filter height must be equal to kernel height");
32 filter.dim32(3) == this->kernel_w(),
33 "filter width must be equal to kernel width");
34 auto sizes = ConvTransposeUnpoolBase<Context>::GetOutputSize(X, C);
35 Tensor* Y = Output(0, sizes, at::dtype<T>());
37 const int kernel_dim = C * this->kernel_h() * this->kernel_w();
38 const int input_image_size = H * W;
39 const int output_image_size = Y->dim32(2) * Y->dim32(3);
41 if (InputSize() == 3) {
42 auto& bias = Input(BIAS);
43 CAFFE_ENFORCE(bias.dim() == 1,
"bias must be 1D tensor");
46 "bias dimension must be equal to output channel number");
49 {1, output_image_size},
50 at::dtype<T>().device(Context::GetDeviceType()));
51 T* bm_data = bias_multiplier_.template mutable_data<T>();
52 math::Set<T, Context>(
59 const T* Xdata = X.template data<T>();
60 const T* filter_data = filter.template data<T>();
61 T* Ydata = Y->template mutable_data<T>();
63 auto f = [&](
Tensor* col_buffer) {
64 ReinitializeTensor(col_buffer, vector<int64_t>{C, this->kernel_h(), this->kernel_w(), H, W}, at::dtype<T>().device(Context::GetDeviceType()));
65 T* col_buffer_data = col_buffer->template mutable_data<T>();
66 for (
auto image_id = 0; image_id < N; ++image_id) {
68 math::Gemm<T, Context>(
82 math::Col2Im<T, Context, StorageOrder::NCHW>(
101 if (InputSize() == 3) {
102 const T* bias_data = Input(BIAS).template data<T>();
103 const T* bm_data = bias_multiplier_.template data<T>();
104 #if !defined(__ARM_NEON__) && !defined(__ARM_NEON) 105 math::Gemm<T, Context>(
118 math::BiasCHW<T, Context>(
125 #endif // !defined(__ARM_NEON__) && !defined(__ARM_NEON) 129 Ydata += Y->numel() / Y->dim32(0);
132 if (FLAGS_caffe2_force_shared_col_buffer || shared_buffer_) {
133 runWithSharedBuffer<Context>(ws_, f);
140 template <
typename T,
class Context>
141 bool ConvTransposeOp<T, Context>::RunOnDeviceWithOrderNHWC() {
142 const Tensor& X = Input(INPUT);
143 auto& filter = Input(FILTER);
144 const auto N = X.dim32(0), H = X.dim32(1), W = X.dim32(2), M = X.dim32(3);
145 CAFFE_ENFORCE(filter.dim() == 4,
"filter must be 4D tensor");
147 filter.dim32(0) == M,
148 "filter number must be equal to input channel number");
150 filter.dim32(1) == this->kernel_h(),
151 "filter height must be equal to kernel height");
153 filter.dim32(2) == this->kernel_w(),
154 "filter width must be equal to kernel width");
155 const int C = filter.dim32(3);
156 auto sizes = ConvTransposeUnpoolBase<Context>::GetOutputSize(X, C);
157 Tensor* Y = Output(0, sizes, at::dtype<T>());
159 const auto kernel_dim = C * this->kernel_h() * this->kernel_w();
160 const auto input_image_size = H * W;
161 const auto output_image_size = Y->dim32(1) * Y->dim32(2);
163 if (InputSize() == 3) {
164 auto& bias = Input(BIAS);
165 CAFFE_ENFORCE(bias.dim() == 1,
"bias must be 1D tensor");
168 "bias dimension must be equal to output channel number");
172 {1, output_image_size},
173 at::dtype<T>().device(Context::GetDeviceType()));
174 T* bm_data = bias_multiplier_.template mutable_data<T>();
175 math::Set<T, Context>(
181 const T* Xdata = X.template data<T>();
182 const T* filter_data = filter.template data<T>();
183 T* Ydata = Y->template mutable_data<T>();
188 vector<int64_t>{H, W, this->kernel_h(), this->kernel_w(), C},
189 at::dtype<T>().device(Context::GetDeviceType()));
190 T* col_buffer_data = col_buffer_.template mutable_data<T>();
191 for (
auto image_id = 0; image_id < N; ++image_id) {
193 math::Gemm<T, Context>(
206 math::Col2Im<T, Context, StorageOrder::NHWC>(
224 if (InputSize() == 3) {
225 const T* bm_data = bias_multiplier_.template data<T>();
226 const T* bias_data = Input(BIAS).template data<T>();
227 math::Gemm<T, Context>(
241 Ydata += Y->numel() / Y->dim32(0);
244 if (FLAGS_caffe2_force_shared_col_buffer || shared_buffer_) {
245 runWithSharedBuffer<Context>(ws_, f);
252 template <
typename T,
class Context>
253 bool ConvTransposeGradientOp<T, Context>::RunOnDeviceWithOrderNCHW() {
254 auto& X = Input(INPUT);
255 auto& filter = Input(FILTER);
256 auto& dY = Input(OUTPUT_GRAD);
258 const int N = X.dim32(0), M = X.dim32(1), H = X.dim32(2), W = X.dim32(3);
263 CAFFE_ENFORCE(filter.dim() == 4);
264 const int C = filter.dim32(1);
266 filter.dim32(2) == this->kernel_h(),
267 "filter height must be equal to kernel height");
269 filter.dim32(3) == this->kernel_w(),
270 "filter width must be equal to kernel width");
271 auto* dfilter = Output(FILTER_GRAD, filter.sizes(), at::dtype<T>());
273 const int kernel_dim = C * this->kernel_h() * this->kernel_w();
274 const int output_image_size = dY.dim32(2) * dY.dim32(3);
278 vector<int64_t>{C, this->kernel_h(), this->kernel_w(), H, W},
279 at::dtype<T>().device(Context::GetDeviceType()));
281 auto* dbias = Output(BIAS_OR_INPUT_GRAD);
286 {1, output_image_size},
287 at::dtype<T>().device(Context::GetDeviceType()));
288 T* bm_data = bias_multiplier_.template mutable_data<T>();
289 math::Set<T, Context>(
295 T* col_buffer_data = col_buffer_.template mutable_data<T>();
296 const T* Xdata = X.template data<T>();
297 const T* filter_data = filter.template data<T>();
298 const T* dYdata = dY.template data<T>();
299 T* dfilter_data = dfilter->template mutable_data<T>();
301 math::Set<T, Context>(dfilter->numel(), 0, dfilter_data, &context_);
303 auto* dbias = Output(BIAS_OR_INPUT_GRAD);
304 T* dbias_data = dbias->template mutable_data<T>();
305 math::Set<T, Context>(dbias->numel(), 0, dbias_data, &context_);
307 for (
auto image_id = 0; image_id < N; ++image_id) {
310 math::Im2Col<T, Context, StorageOrder::NCHW>(
328 math::Gemm<T, Context>(
342 const T* bm_data = bias_multiplier_.template data<T>();
343 T* input_grad_data = Output(BIAS_OR_INPUT_GRAD)->template mutable_data<T>();
344 math::Gemm<T, Context>(
357 dYdata += dY.numel() / dY.dim32(0);
358 Xdata += X.numel() / X.dim32(0);
360 if (OutputSize() == 3 || (no_bias_ && (OutputSize() == 2))) {
363 dYdata = dY.template data<T>();
366 no_bias_ ? BIAS_OR_INPUT_GRAD : INPUT_GRAD, X.sizes(), at::dtype<T>());
367 T* dXdata = dX->template mutable_data<T>();
368 for (
auto image_id = 0; image_id < N; ++image_id) {
372 math::Im2Col<T, Context, StorageOrder::NCHW>(
390 math::Gemm<T, Context>(
402 dYdata += dY.numel() / dY.dim32(0);
403 dXdata += X.numel() / X.dim32(0);
409 template <
typename T,
class Context>
410 bool ConvTransposeGradientOp<T, Context>::RunOnDeviceWithOrderNHWC() {
411 auto& X = Input(INPUT);
412 auto& filter = Input(FILTER);
413 auto& dY = Input(OUTPUT_GRAD);
415 const int N = X.dim32(0), H = X.dim32(1), W = X.dim32(2), M = X.dim32(3);
420 CAFFE_ENFORCE(filter.dim() == 4,
"filter must be 4D tensor");
422 filter.dim32(1) == this->kernel_h(),
423 "filter height must be equal to kernel height");
425 filter.dim32(2) == this->kernel_w(),
426 "filter width must be equal to kernel width");
427 const int C = filter.dim32(3);
428 auto* dfilter = Output(FILTER_GRAD, filter.sizes(), at::dtype<T>());
430 const int kernel_dim = C * this->kernel_h() * this->kernel_w();
431 const int output_image_size = dY.dim32(1) * dY.dim32(2);
435 vector<int64_t>{H, W, this->kernel_h(), this->kernel_w(), C},
436 at::dtype<T>().device(Context::GetDeviceType()));
438 auto* dbias = Output(BIAS_OR_INPUT_GRAD);
443 {1, output_image_size},
444 at::dtype<T>().device(Context::GetDeviceType()));
445 T* bm_data = bias_multiplier_.template mutable_data<T>();
446 math::Set<T, Context>(
452 T* col_buffer_data = col_buffer_.template mutable_data<T>();
453 const T* Xdata = X.template data<T>();
454 const T* filter_data = filter.template data<T>();
455 const T* dYdata = dY.template data<T>();
456 T* dfilter_data = dfilter->template mutable_data<T>();
458 math::Set<T, Context>(dfilter->numel(), 0, dfilter_data, &context_);
460 auto* dbias = Output(BIAS_OR_INPUT_GRAD);
461 T* dbias_data = dbias->template mutable_data<T>();
462 math::Set<T, Context>(dbias->numel(), 0, dbias_data, &context_);
464 for (
auto image_id = 0; image_id < N; ++image_id) {
467 math::Im2Col<T, Context, StorageOrder::NHWC>(
485 math::Gemm<T, Context>(
499 const T* bm_data = bias_multiplier_.template data<T>();
500 T* input_grad_data = Output(BIAS_OR_INPUT_GRAD)->template mutable_data<T>();
501 math::Gemm<T, Context>(
514 dYdata += dY.numel() / dY.dim32(0);
515 Xdata += X.numel() / X.dim32(0);
517 if (OutputSize() == 3 || (no_bias_ && (OutputSize() == 2))) {
520 dYdata = dY.template data<T>();
523 no_bias_ ? BIAS_OR_INPUT_GRAD : INPUT_GRAD, X.sizes(), at::dtype<T>());
524 T* dXdata = dX->template mutable_data<T>();
525 for (
auto image_id = 0; image_id < N; ++image_id) {
529 math::Im2Col<T, Context, StorageOrder::NHWC>(
547 math::Gemm<T, Context>(
559 dYdata += dY.numel() / dY.dim32(0);
560 dXdata += X.numel() / X.dim32(0);
567 #endif // CAFFE2_OPERATORS_CONV_TRANSPOSE_OP_IMPL_H_
void ReinitializeTensor(Tensor *tensor, at::IntArrayRef dims, at::TensorOptions options)
Reinitialize a Tensor to given dims and options if necessary, note that this will not do anything if ...
A global dictionary that holds information about what Caffe2 modules have been loaded in the current ...