Caffe2 - C++ API
A deep learning, cross platform ML framework
conv_op_eigen.cc
1 
17 #include "caffe2/core/context.h"
18 #include "caffe2/core/operator.h"
19 #include "caffe2/operators/conv_pool_op_base.h"
20 
21 #include "unsupported/Eigen/CXX11/Tensor"
22 
23 namespace caffe2 {
24 
25 template <typename T>
26 class EigenConvOp final : public ConvPoolOpBase<CPUContext> {
27  public:
28  USE_CONV_POOL_BASE_FUNCTIONS(CPUContext);
29  EigenConvOp(const OperatorDef& operator_def, Workspace* ws)
30  : ConvPoolOpBase<CPUContext>(operator_def, ws) {
31  OPERATOR_NEEDS_FEATURE(group_ == 1, "Group convolution not supported yet.");
32  }
33  ~EigenConvOp() {}
34 
35  bool RunOnDeviceWithOrderNCHW() override;
36  bool RunOnDeviceWithOrderNHWC() override;
37 
38  private:
39  INPUT_TAGS(INPUT, FILTER, BIAS);
40 };
41 
42 // The NCHW implementation: we do explicit transposes before and after, which
43 // are not ideal but provides a compatible path instead of throwing the error.
44 template <typename T>
46  auto& X = Input(INPUT);
47  auto& filter = Input(FILTER);
48  auto* Y = Output(0);
49  const int N = X.dim32(0), C = X.dim32(1), H = X.dim32(2), W = X.dim32(3);
50  CAFFE_ENFORCE(4 == filter.ndim());
51  const int M = filter.dim32(0);
52  CAFFE_ENFORCE(filter.dim32(1) == C);
53  CAFFE_ENFORCE(filter.dim32(2) == kernel_h());
54  CAFFE_ENFORCE(filter.dim32(3) == kernel_w());
55  ConvPoolOpBase<CPUContext>::SetOutputSize(X, Y, filter.dim32(0));
56  Eigen::array<TIndex, 4> kernel_shuffles
57  { {TIndex(2), TIndex(3), TIndex(1), TIndex(0)} };
58  Eigen::array<TIndex, 4> input_shuffles
59  { {TIndex(0), TIndex(2), TIndex(3), TIndex(1)} };
60 
61  Eigen::Tensor<T, 4, Eigen::RowMajor> filter_tensor =
62  Eigen::TensorMap<Eigen::Tensor<T, 4, Eigen::RowMajor>>(
63  const_cast<T*>(filter.template data<T>()),
64  M,
65  C,
66  kernel_h(),
67  kernel_w())
68  .shuffle(kernel_shuffles);
69  Eigen::Tensor<T, 4, Eigen::RowMajor> X_tensor =
70  Eigen::TensorMap<Eigen::Tensor<T, 4, Eigen::RowMajor>>(
71  const_cast<T*>(X.template data<T>()), N, C, H, W)
72  .shuffle(input_shuffles);
73 
74  // For Eigen, the definition of row and col actually correspond to width
75  // and height instead of the other way round, so notice how we pass the
76  // stride, pad and dilation values.
77  typedef typename Eigen::internal::traits<
78  Eigen::Tensor<T, 4, Eigen::RowMajor>>::Index TensorIndex;
79  Eigen::array<Eigen::IndexPair<TensorIndex>, 1> contract_dims;
80  contract_dims[0] = Eigen::IndexPair<TensorIndex>(1, 0);
81 
82  Eigen::DSizes<TensorIndex, 2> pre_contract_dims;
83  pre_contract_dims[1] = kernel_h() * kernel_w() * C;
84  pre_contract_dims[0] = Y->size() / M;
85 
86  Eigen::DSizes<TensorIndex, 2> kernel_dims;
87  kernel_dims[0] = kernel_h() * kernel_w() * C;
88  kernel_dims[1] = M;
89 
90  Eigen::array<TensorIndex, 4> bcast_dims;
91  bcast_dims[0] = N;
92  bcast_dims[1] = Y->dim32(1);
93  bcast_dims[2] = Y->dim32(2);
94  bcast_dims[3] = 1;
95 
96  Eigen::Tensor<T, 4, Eigen::RowMajor> Y_tensor(
97  Y->dim32(0), Y->dim32(2), Y->dim32(3), Y->dim32(1));
98  Y_tensor = X_tensor
99  .extract_image_patches(
100  kernel_w(),
101  kernel_h(),
102  stride_w(),
103  stride_h(),
104  dilation_w(),
105  dilation_h(),
106  1,
107  1,
108  pad_l(),
109  pad_r(),
110  pad_t(),
111  pad_b(),
112  0)
113  .reshape(pre_contract_dims)
114  .contract(filter_tensor.reshape(kernel_dims), contract_dims)
115  .reshape(Y_tensor.dimensions());
116  if (InputSize() == 3) {
117  auto& bias = Input(BIAS);
118  CAFFE_ENFORCE(1 == bias.ndim());
119  CAFFE_ENFORCE(bias.dim32(0) == M);
120  // It seems that the bias broadcast is still slower so let's do the
121  // following for now.
122  EigenArrayMap<T> Y_arr(
123  Y_tensor.data(), static_cast<TIndex>(M), Y->size() / M);
124  ConstEigenVectorArrayMap<T> bias_arr(bias.template data<T>(), M);
125  Y_arr = Y_arr.colwise() + bias_arr;
126  }
127 
128  // Do a last transpose.
129  Eigen::array<TIndex, 4> output_shuffles
130  { {TIndex(0), TIndex(3), TIndex(1), TIndex(2) } };
131 
132  Eigen::TensorMap<Eigen::Tensor<T, 4, Eigen::RowMajor>>(
133  Y->template mutable_data<T>(), N, M, Y->dim32(2), Y->dim32(3)) =
134  Y_tensor.shuffle(output_shuffles);
135  return true;
136 }
137 
138 template <typename T>
140  auto& X = Input(INPUT);
141  auto& filter = Input(FILTER);
142  auto* Y = Output(0);
143  const int N = X.dim32(0), H = X.dim32(1), W = X.dim32(2), C = X.dim32(3);
144  CAFFE_ENFORCE(4 == filter.ndim());
145  const int M = filter.dim32(0);
146  CAFFE_ENFORCE(filter.dim32(1) == kernel_h());
147  CAFFE_ENFORCE(filter.dim32(2) == kernel_w());
148  CAFFE_ENFORCE(filter.dim32(3) == C);
149  ConvPoolOpBase<CPUContext>::SetOutputSize(X, Y, filter.dim32(0));
150  // Eigen expects filter to be of shape (kernel_h, kernel_w, C, M) for
151  // optimization purposes, so we will create a temp one.
152  Eigen::Array<T, Eigen::Dynamic, Eigen::Dynamic> temp_filter(
153  M, kernel_h() * kernel_w() * C);
154  temp_filter = ConstEigenArrayMap<T>(
155  filter.template data<T>(), kernel_h() * kernel_w() * C, M)
156  .transpose();
157 
158  // Create tensor maps, and call spatial convolution.
159  // TODO(jiayq): right now we const cast away the const pointer, but we will
160  // need to figure out how to properly do a const tensormap.
161  Eigen::TensorMap<Eigen::Tensor<T, 4, Eigen::RowMajor>> X_tensor(
162  const_cast<T*>(X.template data<T>()), N, H, W, C);
163  Eigen::TensorMap<Eigen::Tensor<T, 4, Eigen::RowMajor>> Y_tensor(
164  Y->template mutable_data<T>(), N, Y->dim32(1), Y->dim32(2), M);
165  Eigen::TensorMap<Eigen::Tensor<T, 4, Eigen::RowMajor>> filter_tensor(
166  const_cast<T*>(temp_filter.data()), kernel_h(), kernel_w(), C, M);
167 
168  // For Eigen, the definition of row and col actually correspond to width
169  // and height instead of the other way round, so notice how we pass the
170  // stride, pad and dilation values.
171  typedef typename Eigen::internal::traits<
172  Eigen::Tensor<T, 4, Eigen::RowMajor>>::Index TensorIndex;
173  Eigen::array<Eigen::IndexPair<TensorIndex>, 1> contract_dims;
174  contract_dims[0] = Eigen::IndexPair<TensorIndex>(1, 0);
175 
176  Eigen::DSizes<TensorIndex, 2> pre_contract_dims;
177  pre_contract_dims[1] = kernel_h() * kernel_w() * C;
178  pre_contract_dims[0] = Y->size() / M;
179 
180  Eigen::DSizes<TensorIndex, 2> kernel_dims;
181  kernel_dims[0] = kernel_h() * kernel_w() * C;
182  kernel_dims[1] = M;
183 
184  Eigen::array<TensorIndex, 4> bcast_dims;
185  bcast_dims[0] = N;
186  bcast_dims[1] = Y->dim32(1);
187  bcast_dims[2] = Y->dim32(2);
188  bcast_dims[3] = 1;
189 
190  Y_tensor = X_tensor
191  .extract_image_patches(
192  kernel_w(),
193  kernel_h(),
194  stride_w(),
195  stride_h(),
196  dilation_w(),
197  dilation_h(),
198  1,
199  1,
200  pad_l(),
201  pad_r(),
202  pad_t(),
203  pad_b(),
204  0)
205  .reshape(pre_contract_dims)
206  .contract(filter_tensor.reshape(kernel_dims), contract_dims)
207  .reshape(Y_tensor.dimensions());
208 
209  if (InputSize() == 3) {
210  auto& bias = Input(BIAS);
211  CAFFE_ENFORCE(1 == bias.ndim());
212  CAFFE_ENFORCE(bias.dim32(0) == M);
213  Eigen::TensorMap<Eigen::Tensor<T, 4, Eigen::RowMajor>> bias_tensor(
214  const_cast<T*>(bias.template data<T>()), 1, 1, 1, M);
215  // It seems that the bias broadcast is still slower so let's do the
216  // following for now.
217  EigenArrayMap<T> Y_arr(
218  Y->template mutable_data<T>(), static_cast<TIndex>(M), Y->size() / M);
219  ConstEigenVectorArrayMap<T> bias_arr(bias.template data<T>(), M);
220  Y_arr = Y_arr.colwise() + bias_arr;
221  }
222  return true;
223 }
224 
225 REGISTER_CPU_OPERATOR_WITH_ENGINE(Conv, EIGEN, EigenConvOp<float>);
226 REGISTER_CPU_OPERATOR_WITH_ENGINE(Conv1D, EIGEN, EigenConvOp<float>);
227 REGISTER_CPU_OPERATOR_WITH_ENGINE(Conv2D, EIGEN, EigenConvOp<float>);
228 REGISTER_CPU_OPERATOR_WITH_ENGINE(Conv3D, EIGEN, EigenConvOp<float>);
229 
230 } // namespace caffe2
The CPU Context, representing the bare minimum of what a Context class in Caffe2 should implement...
Definition: context.h:82
Workspace is a class that holds all the related objects created during runtime: (1) all blobs...
Definition: workspace.h:63
Copyright (c) 2016-present, Facebook, Inc.