Caffe2 - C++ API
A deep learning, cross platform ML framework
conv_op_eigen.cc
1 #include "Eigen/Core"
2 #include "caffe2/utils/eigen_utils.h"
3 
4 #if EIGEN_VERSION_AT_LEAST(3, 3, 0)
5 
6 #include "caffe2/core/context.h"
7 #include "caffe2/core/operator.h"
8 #include "caffe2/operators/conv_pool_op_base.h"
9 
10 #include "unsupported/Eigen/CXX11/Tensor"
11 
12 namespace caffe2 {
13 
14 template <typename T>
15 class EigenConvOp final : public ConvPoolOpBase<CPUContext> {
16  public:
17  USE_CONV_POOL_BASE_FUNCTIONS(CPUContext);
18  explicit EigenConvOp(const OperatorDef& operator_def, Workspace* ws)
19  : ConvPoolOpBase<CPUContext>(operator_def, ws) {
20  OPERATOR_NEEDS_FEATURE(group_ == 1, "Group convolution not supported yet.");
21  }
22  ~EigenConvOp() override {}
23 
24  bool RunOnDeviceWithOrderNCHW() override;
25  bool RunOnDeviceWithOrderNHWC() override;
26 
27  private:
28  INPUT_TAGS(INPUT, FILTER, BIAS);
29 };
30 
31 // The NCHW implementation: we do explicit transposes before and after, which
32 // are not ideal but provides a compatible path instead of throwing the error.
33 template <typename T>
34 bool EigenConvOp<T>::RunOnDeviceWithOrderNCHW() {
35  auto& X = Input(INPUT);
36  auto& filter = Input(FILTER);
37  auto* Y = Output(0);
38  const int N = X.dim32(0), C = X.dim32(1), H = X.dim32(2), W = X.dim32(3);
39  CAFFE_ENFORCE(4 == filter.dim());
40  const int M = filter.dim32(0);
41  CAFFE_ENFORCE(filter.dim32(1) == C);
42  CAFFE_ENFORCE(filter.dim32(2) == kernel_h());
43  CAFFE_ENFORCE(filter.dim32(3) == kernel_w());
44  ConvPoolOpBase<CPUContext>::SetOutputSize(X, Y, filter.dim32(0));
45  Eigen::array<int64_t, 4> kernel_shuffles
46  { {int64_t(2), int64_t(3), int64_t(1), int64_t(0)} };
47  Eigen::array<int64_t, 4> input_shuffles
48  { {int64_t(0), int64_t(2), int64_t(3), int64_t(1)} };
49 
50  Eigen::Tensor<T, 4, Eigen::RowMajor> filter_tensor =
51  Eigen::TensorMap<Eigen::Tensor<T, 4, Eigen::RowMajor>>(
52  const_cast<T*>(filter.template data<T>()),
53  M,
54  C,
55  kernel_h(),
56  kernel_w())
57  .shuffle(kernel_shuffles);
58  Eigen::Tensor<T, 4, Eigen::RowMajor> X_tensor =
59  Eigen::TensorMap<Eigen::Tensor<T, 4, Eigen::RowMajor>>(
60  const_cast<T*>(X.template data<T>()), N, C, H, W)
61  .shuffle(input_shuffles);
62 
63  // For Eigen, the definition of row and col actually correspond to width
64  // and height instead of the other way round, so notice how we pass the
65  // stride, pad and dilation values.
66  typedef typename Eigen::internal::traits<
67  Eigen::Tensor<T, 4, Eigen::RowMajor>>::Index TensorIndex;
68  Eigen::array<Eigen::IndexPair<TensorIndex>, 1> contract_dims;
69  contract_dims[0] = Eigen::IndexPair<TensorIndex>(1, 0);
70 
71  Eigen::DSizes<TensorIndex, 2> pre_contract_dims;
72  pre_contract_dims[1] = kernel_h() * kernel_w() * C;
73  pre_contract_dims[0] = Y->numel() / M;
74 
75  Eigen::DSizes<TensorIndex, 2> kernel_dims;
76  kernel_dims[0] = kernel_h() * kernel_w() * C;
77  kernel_dims[1] = M;
78 
79  Eigen::array<TensorIndex, 4> bcast_dims;
80  bcast_dims[0] = N;
81  bcast_dims[1] = Y->dim32(1);
82  bcast_dims[2] = Y->dim32(2);
83  bcast_dims[3] = 1;
84 
85  Eigen::Tensor<T, 4, Eigen::RowMajor> Y_tensor(
86  Y->dim32(0), Y->dim32(2), Y->dim32(3), Y->dim32(1));
87  Y_tensor = X_tensor
88  .extract_image_patches(
89  kernel_w(),
90  kernel_h(),
91  stride_w(),
92  stride_h(),
93  dilation_w(),
94  dilation_h(),
95  1,
96  1,
97  pad_l(),
98  pad_r(),
99  pad_t(),
100  pad_b(),
101  0)
102  .reshape(pre_contract_dims)
103  .contract(filter_tensor.reshape(kernel_dims), contract_dims)
104  .reshape(Y_tensor.dimensions());
105  if (InputSize() == 3) {
106  auto& bias = Input(BIAS);
107  CAFFE_ENFORCE(1 == bias.dim());
108  CAFFE_ENFORCE(bias.dim32(0) == M);
109  // It seems that the bias broadcast is still slower so let's do the
110  // following for now.
111  EigenArrayMap<T> Y_arr(
112  Y_tensor.data(), static_cast<int64_t>(M), Y->numel() / M);
113  ConstEigenVectorArrayMap<T> bias_arr(bias.template data<T>(), M);
114  Y_arr = Y_arr.colwise() + bias_arr;
115  }
116 
117  // Do a last transpose.
118  Eigen::array<int64_t, 4> output_shuffles
119  { {int64_t(0), int64_t(3), int64_t(1), int64_t(2) } };
120 
121  Eigen::TensorMap<Eigen::Tensor<T, 4, Eigen::RowMajor>>(
122  Y->template mutable_data<T>(), N, M, Y->dim32(2), Y->dim32(3)) =
123  Y_tensor.shuffle(output_shuffles);
124  return true;
125 }
126 
127 template <typename T>
128 bool EigenConvOp<T>::RunOnDeviceWithOrderNHWC() {
129  auto& X = Input(INPUT);
130  auto& filter = Input(FILTER);
131  auto* Y = Output(0);
132  const int N = X.dim32(0), H = X.dim32(1), W = X.dim32(2), C = X.dim32(3);
133  CAFFE_ENFORCE(4 == filter.dim());
134  const int M = filter.dim32(0);
135  CAFFE_ENFORCE(filter.dim32(1) == kernel_h());
136  CAFFE_ENFORCE(filter.dim32(2) == kernel_w());
137  CAFFE_ENFORCE(filter.dim32(3) == C);
138  ConvPoolOpBase<CPUContext>::SetOutputSize(X, Y, filter.dim32(0));
139  // Eigen expects filter to be of shape (kernel_h, kernel_w, C, M) for
140  // optimization purposes, so we will create a temp one.
141  Eigen::Array<T, Eigen::Dynamic, Eigen::Dynamic> temp_filter(
142  M, kernel_h() * kernel_w() * C);
143  temp_filter = ConstEigenArrayMap<T>(
144  filter.template data<T>(), kernel_h() * kernel_w() * C, M)
145  .transpose();
146 
147  // Create tensor maps, and call spatial convolution.
148  // TODO(jiayq): right now we const cast away the const pointer, but we will
149  // need to figure out how to properly do a const tensormap.
150  Eigen::TensorMap<Eigen::Tensor<T, 4, Eigen::RowMajor>> X_tensor(
151  const_cast<T*>(X.template data<T>()), N, H, W, C);
152  Eigen::TensorMap<Eigen::Tensor<T, 4, Eigen::RowMajor>> Y_tensor(
153  Y->template mutable_data<T>(), N, Y->dim32(1), Y->dim32(2), M);
154  Eigen::TensorMap<Eigen::Tensor<T, 4, Eigen::RowMajor>> filter_tensor(
155  const_cast<T*>(temp_filter.data()), kernel_h(), kernel_w(), C, M);
156 
157  // For Eigen, the definition of row and col actually correspond to width
158  // and height instead of the other way round, so notice how we pass the
159  // stride, pad and dilation values.
160  typedef typename Eigen::internal::traits<
161  Eigen::Tensor<T, 4, Eigen::RowMajor>>::Index TensorIndex;
162  Eigen::array<Eigen::IndexPair<TensorIndex>, 1> contract_dims;
163  contract_dims[0] = Eigen::IndexPair<TensorIndex>(1, 0);
164 
165  Eigen::DSizes<TensorIndex, 2> pre_contract_dims;
166  pre_contract_dims[1] = kernel_h() * kernel_w() * C;
167  pre_contract_dims[0] = Y->numel() / M;
168 
169  Eigen::DSizes<TensorIndex, 2> kernel_dims;
170  kernel_dims[0] = kernel_h() * kernel_w() * C;
171  kernel_dims[1] = M;
172 
173  Eigen::array<TensorIndex, 4> bcast_dims;
174  bcast_dims[0] = N;
175  bcast_dims[1] = Y->dim32(1);
176  bcast_dims[2] = Y->dim32(2);
177  bcast_dims[3] = 1;
178 
179  Y_tensor = X_tensor
180  .extract_image_patches(
181  kernel_w(),
182  kernel_h(),
183  stride_w(),
184  stride_h(),
185  dilation_w(),
186  dilation_h(),
187  1,
188  1,
189  pad_l(),
190  pad_r(),
191  pad_t(),
192  pad_b(),
193  0)
194  .reshape(pre_contract_dims)
195  .contract(filter_tensor.reshape(kernel_dims), contract_dims)
196  .reshape(Y_tensor.dimensions());
197 
198  if (InputSize() == 3) {
199  auto& bias = Input(BIAS);
200  CAFFE_ENFORCE(1 == bias.dim());
201  CAFFE_ENFORCE(bias.dim32(0) == M);
202  Eigen::TensorMap<Eigen::Tensor<T, 4, Eigen::RowMajor>> bias_tensor(
203  const_cast<T*>(bias.template data<T>()), 1, 1, 1, M);
204  // It seems that the bias broadcast is still slower so let's do the
205  // following for now.
206  EigenArrayMap<T> Y_arr(
207  Y->template mutable_data<T>(), static_cast<int64_t>(M), Y->numel() / M);
208  ConstEigenVectorArrayMap<T> bias_arr(bias.template data<T>(), M);
209  Y_arr = Y_arr.colwise() + bias_arr;
210  }
211  return true;
212 }
213 
214 REGISTER_CPU_OPERATOR_WITH_ENGINE(Conv, EIGEN, EigenConvOp<float>);
215 REGISTER_CPU_OPERATOR_WITH_ENGINE(Conv1D, EIGEN, EigenConvOp<float>);
216 REGISTER_CPU_OPERATOR_WITH_ENGINE(Conv2D, EIGEN, EigenConvOp<float>);
217 REGISTER_CPU_OPERATOR_WITH_ENGINE(Conv3D, EIGEN, EigenConvOp<float>);
218 
219 } // namespace caffe2
220 
221 #endif
Definition: any.cpp:108
A global dictionary that holds information about what Caffe2 modules have been loaded in the current ...
Definition: blob.h:13
Definition: OpClasses.h:13
Definition: static.cpp:64