2 #include "caffe2/utils/eigen_utils.h" 4 #if EIGEN_VERSION_AT_LEAST(3, 3, 0) 6 #include "caffe2/core/context.h" 7 #include "caffe2/core/operator.h" 8 #include "caffe2/operators/conv_pool_op_base.h" 10 #include "unsupported/Eigen/CXX11/Tensor" 15 class EigenConvOp final :
public ConvPoolOpBase<CPUContext> {
17 USE_CONV_POOL_BASE_FUNCTIONS(CPUContext);
18 explicit EigenConvOp(
const OperatorDef& operator_def, Workspace* ws)
19 : ConvPoolOpBase<CPUContext>(operator_def, ws) {
20 OPERATOR_NEEDS_FEATURE(group_ == 1,
"Group convolution not supported yet.");
22 ~EigenConvOp()
override {}
24 bool RunOnDeviceWithOrderNCHW()
override;
25 bool RunOnDeviceWithOrderNHWC()
override;
28 INPUT_TAGS(INPUT, FILTER, BIAS);
34 bool EigenConvOp<T>::RunOnDeviceWithOrderNCHW() {
35 auto& X = Input(INPUT);
36 auto& filter = Input(FILTER);
38 const int N = X.dim32(0),
C = X.dim32(1), H = X.dim32(2), W = X.dim32(3);
39 CAFFE_ENFORCE(4 == filter.dim());
40 const int M = filter.dim32(0);
41 CAFFE_ENFORCE(filter.dim32(1) ==
C);
42 CAFFE_ENFORCE(filter.dim32(2) == kernel_h());
43 CAFFE_ENFORCE(filter.dim32(3) == kernel_w());
44 ConvPoolOpBase<CPUContext>::SetOutputSize(X, Y, filter.dim32(0));
45 Eigen::array<int64_t, 4> kernel_shuffles
46 { {int64_t(2), int64_t(3), int64_t(1), int64_t(0)} };
47 Eigen::array<int64_t, 4> input_shuffles
48 { {int64_t(0), int64_t(2), int64_t(3), int64_t(1)} };
50 Eigen::Tensor<T, 4, Eigen::RowMajor> filter_tensor =
51 Eigen::TensorMap<Eigen::Tensor<T, 4, Eigen::RowMajor>>(
52 const_cast<T*
>(filter.template data<T>()),
57 .shuffle(kernel_shuffles);
58 Eigen::Tensor<T, 4, Eigen::RowMajor> X_tensor =
59 Eigen::TensorMap<Eigen::Tensor<T, 4, Eigen::RowMajor>>(
60 const_cast<T*
>(X.template data<T>()), N,
C, H, W)
61 .shuffle(input_shuffles);
66 typedef typename Eigen::internal::traits<
67 Eigen::Tensor<T, 4, Eigen::RowMajor>>::Index TensorIndex;
68 Eigen::array<Eigen::IndexPair<TensorIndex>, 1> contract_dims;
69 contract_dims[0] = Eigen::IndexPair<TensorIndex>(1, 0);
71 Eigen::DSizes<TensorIndex, 2> pre_contract_dims;
72 pre_contract_dims[1] = kernel_h() * kernel_w() * C;
73 pre_contract_dims[0] = Y->numel() / M;
75 Eigen::DSizes<TensorIndex, 2> kernel_dims;
76 kernel_dims[0] = kernel_h() * kernel_w() * C;
79 Eigen::array<TensorIndex, 4> bcast_dims;
81 bcast_dims[1] = Y->dim32(1);
82 bcast_dims[2] = Y->dim32(2);
85 Eigen::Tensor<T, 4, Eigen::RowMajor> Y_tensor(
86 Y->dim32(0), Y->dim32(2), Y->dim32(3), Y->dim32(1));
88 .extract_image_patches(
102 .reshape(pre_contract_dims)
103 .contract(filter_tensor.reshape(kernel_dims), contract_dims)
104 .reshape(Y_tensor.dimensions());
105 if (InputSize() == 3) {
106 auto& bias = Input(BIAS);
107 CAFFE_ENFORCE(1 == bias.dim());
108 CAFFE_ENFORCE(bias.dim32(0) == M);
111 EigenArrayMap<T> Y_arr(
112 Y_tensor.data(),
static_cast<int64_t
>(M), Y->numel() / M);
113 ConstEigenVectorArrayMap<T> bias_arr(bias.template data<T>(), M);
114 Y_arr = Y_arr.colwise() + bias_arr;
118 Eigen::array<int64_t, 4> output_shuffles
119 { {int64_t(0), int64_t(3), int64_t(1), int64_t(2) } };
121 Eigen::TensorMap<Eigen::Tensor<T, 4, Eigen::RowMajor>>(
122 Y->template mutable_data<T>(), N, M, Y->dim32(2), Y->dim32(3)) =
123 Y_tensor.shuffle(output_shuffles);
127 template <
typename T>
128 bool EigenConvOp<T>::RunOnDeviceWithOrderNHWC() {
129 auto& X = Input(INPUT);
130 auto& filter = Input(FILTER);
132 const int N = X.dim32(0), H = X.dim32(1), W = X.dim32(2), C = X.dim32(3);
133 CAFFE_ENFORCE(4 == filter.dim());
134 const int M = filter.dim32(0);
135 CAFFE_ENFORCE(filter.dim32(1) == kernel_h());
136 CAFFE_ENFORCE(filter.dim32(2) == kernel_w());
137 CAFFE_ENFORCE(filter.dim32(3) == C);
138 ConvPoolOpBase<CPUContext>::SetOutputSize(X, Y, filter.dim32(0));
141 Eigen::Array<T, Eigen::Dynamic, Eigen::Dynamic> temp_filter(
142 M, kernel_h() * kernel_w() * C);
143 temp_filter = ConstEigenArrayMap<T>(
144 filter.template data<T>(), kernel_h() * kernel_w() * C, M)
150 Eigen::TensorMap<Eigen::Tensor<T, 4, Eigen::RowMajor>> X_tensor(
151 const_cast<T*>(X.template data<T>()), N, H, W, C);
152 Eigen::TensorMap<Eigen::Tensor<T, 4, Eigen::RowMajor>> Y_tensor(
153 Y->template mutable_data<T>(), N, Y->dim32(1), Y->dim32(2), M);
154 Eigen::TensorMap<Eigen::Tensor<T, 4, Eigen::RowMajor>> filter_tensor(
155 const_cast<T*>(temp_filter.data()), kernel_h(), kernel_w(), C, M);
160 typedef typename Eigen::internal::traits<
161 Eigen::Tensor<T, 4, Eigen::RowMajor>>::Index TensorIndex;
162 Eigen::array<Eigen::IndexPair<TensorIndex>, 1> contract_dims;
163 contract_dims[0] = Eigen::IndexPair<TensorIndex>(1, 0);
165 Eigen::DSizes<TensorIndex, 2> pre_contract_dims;
166 pre_contract_dims[1] = kernel_h() * kernel_w() * C;
167 pre_contract_dims[0] = Y->numel() / M;
169 Eigen::DSizes<TensorIndex, 2> kernel_dims;
170 kernel_dims[0] = kernel_h() * kernel_w() * C;
173 Eigen::array<TensorIndex, 4> bcast_dims;
175 bcast_dims[1] = Y->dim32(1);
176 bcast_dims[2] = Y->dim32(2);
180 .extract_image_patches(
194 .reshape(pre_contract_dims)
195 .contract(filter_tensor.reshape(kernel_dims), contract_dims)
196 .reshape(Y_tensor.dimensions());
198 if (InputSize() == 3) {
199 auto& bias = Input(BIAS);
200 CAFFE_ENFORCE(1 == bias.dim());
201 CAFFE_ENFORCE(bias.dim32(0) == M);
202 Eigen::TensorMap<Eigen::Tensor<T, 4, Eigen::RowMajor>> bias_tensor(
203 const_cast<T*>(bias.template data<T>()), 1, 1, 1, M);
206 EigenArrayMap<T> Y_arr(
207 Y->template mutable_data<T>(), static_cast<int64_t>(M), Y->numel() / M);
208 ConstEigenVectorArrayMap<T> bias_arr(bias.template data<T>(), M);
209 Y_arr = Y_arr.colwise() + bias_arr;
214 REGISTER_CPU_OPERATOR_WITH_ENGINE(
Conv, EIGEN, EigenConvOp<float>);
215 REGISTER_CPU_OPERATOR_WITH_ENGINE(Conv1D, EIGEN, EigenConvOp<float>);
216 REGISTER_CPU_OPERATOR_WITH_ENGINE(Conv2D, EIGEN, EigenConvOp<float>);
217 REGISTER_CPU_OPERATOR_WITH_ENGINE(Conv3D, EIGEN, EigenConvOp<float>);
A global dictionary that holds information about what Caffe2 modules have been loaded in the current ...