Caffe2 - C++ API
A deep learning, cross platform ML framework
Conv.cpp
1 #include <ATen/ATen.h>
2 #include <ATen/NativeFunctions.h>
3 #include <ATen/Config.h>
4 
5 #if !AT_MKLDNN_ENABLED()
6 
7 namespace at { namespace native {
8 
9 at::Tensor mkldnn_convolution(
10  const at::Tensor& input, const at::Tensor& weight, const at::Tensor& bias,
11  IntArrayRef padding, IntArrayRef stride, IntArrayRef dilation, int64_t groups) {
12  AT_ERROR("mkldnn_convolution_forward: ATen not compiled with MKLDNN support");
13 }
14 
15 at::Tensor mkldnn_convolution_backward_input(
16  IntArrayRef input_size, const at::Tensor& grad_output, const at::Tensor& weight,
17  IntArrayRef padding, IntArrayRef stride, IntArrayRef dilation, int64_t groups, bool bias_defined) {
18  AT_ERROR("mkldnn_convolution_backward_input: ATen not compiled with MKLDNN support");
19 }
20 
21 std::tuple<at::Tensor,at::Tensor> mkldnn_convolution_backward_weights(
22  IntArrayRef weight_size, const at::Tensor& grad_output, const at::Tensor& input,
23  IntArrayRef padding, IntArrayRef stride, IntArrayRef dilation, int64_t groups, bool bias_defined) {
24  AT_ERROR("mkldnn_convolution_backward_weights: ATen not compiled with MKLDNN support");
25 }
26 
27 std::tuple<at::Tensor,at::Tensor,at::Tensor> mkldnn_convolution_backward(
28  const at::Tensor& input, const at::Tensor& grad_output_t, const at::Tensor& weight,
29  IntArrayRef padding, IntArrayRef stride, IntArrayRef dilation, int64_t groups, std::array<bool,3> output_mask) {
30  AT_ERROR("mkldnn_convolution_backward: ATen not compiled with MKLDNN support");
31 }
32 
33 }}
34 
35 #else // AT_MKLDNN_EBABLED
36 
37 #include <ATen/mkldnn/Runtime.h>
38 
39 using namespace mkldnn;
40 
41 namespace at { namespace native {
42 
43 constexpr int input_batch_size_dim = 0; // also grad_input
44 constexpr int input_channels_dim = 1;
45 constexpr int output_batch_size_dim = 0; // also grad_output
46 constexpr int output_channels_dim = 1;
47 constexpr int weight_output_channels_dim = 0;
48 constexpr int weight_input_channels_dim = 1;
49 
50 // Often written as 2 + max_dim (extra dims for batch size and channels)
51 constexpr int max_dim = 3;
52 
53 static std::vector<int64_t> conv_output_size(
54  IntArrayRef input_size, IntArrayRef weight_size,
55  IntArrayRef padding, IntArrayRef stride, IntArrayRef dilation, int64_t groups)
56 {
57  auto dim = input_size.size();
58  std::vector<int64_t> output_size(dim);
59  output_size[0] = input_size[input_batch_size_dim];
60  output_size[1] = weight_size[weight_output_channels_dim];
61  for (size_t d = 2; d < dim; ++d) {
62  auto kernel = dilation[d - 2] * (weight_size[d] - 1) + 1;
63  output_size[d] = (input_size[d] + (2 * padding[d - 2])
64  - kernel) / stride[d - 2] + 1;
65  }
66  return output_size;
67 }
68 
69 at::Tensor mkldnn_convolution(
70  const at::Tensor& input, const at::Tensor& weight, const at::Tensor& bias,
71  IntArrayRef padding, IntArrayRef stride, IntArrayRef dilation, int64_t groups)
72 {
73  auto output = at::empty(conv_output_size(
74  input.sizes(), weight.sizes(), padding, stride, dilation, groups), input.options());
75 
76  auto cpu_engine = CpuEngine::Instance().get_engine();
77 
78  int32_t g = groups;
79 
80  int32_t n = input.size(0);
81  int32_t ic = input.size(1);
82  int32_t ih = input.size(2);
83  int32_t iw = input.size(3);
84 
85  int32_t oc = output.size(1);
86  int32_t oh = output.size(2);
87  int32_t ow = output.size(3);
88 
89  int32_t kh = weight.size(2);
90  int32_t kw = weight.size(3);
91 
92  int32_t sh = stride[0];
93  int32_t sw = stride[1];
94  int32_t ph = padding[0];
95  int32_t pw = padding[1];
96 
97  auto data_t = memory::data_type::f32;
98  auto format_any = memory::format::any;
99  auto format_nchw = memory::format::nchw;
100  auto format_weight = (g!= 1) ? memory::format::goihw : memory::format::oihw;
101  auto format_x = memory::format::x;
102 
103  memory::dims input_tz = {n, ic, ih, iw};
104  memory::dims weight_tz = (g!= 1) ? memory::dims{g, oc/g, ic/g, kh, kw} : memory::dims{oc, ic, kh, kw};
105  memory::dims bias_tz = {oc};
106  memory::dims output_tz = {n, oc, oh, ow};
107  memory::dims _stride = {sh, sw};
108  memory::dims _padding = {ph, pw};
109 
110  auto input_md = memory::desc({input_tz}, data_t, format_any);
111  auto weight_md = memory::desc({weight_tz}, data_t, format_any);
112  auto bias_md = memory::desc({bias_tz}, data_t, format_any);
113  auto output_md = memory::desc({output_tz}, data_t, format_any);
114 
115  std::shared_ptr<convolution_forward::desc> conv_forward_desc;
116  if (bias.defined()) {
117  conv_forward_desc.reset(new convolution_forward::desc(prop_kind::forward,
118  convolution_direct, input_md, weight_md, bias_md, output_md,
119  _stride, _padding, _padding, padding_kind::zero));
120  } else {
121  conv_forward_desc.reset(new convolution_forward::desc(prop_kind::forward,
122  convolution_direct, input_md, weight_md, output_md,
123  _stride, _padding, _padding, padding_kind::zero));
124  }
125 
126  std::shared_ptr<convolution_forward::primitive_desc> conv_forward_pd;
127  conv_forward_pd.reset(new convolution_forward::primitive_desc(
128  *conv_forward_desc, cpu_engine));
129 
130  auto input_usr_memory = memory({{{input_tz}, data_t, format_nchw}, cpu_engine},
131  input.data_ptr());
132  auto weight_usr_memory = memory({{{weight_tz}, data_t, format_weight}, cpu_engine},
133  weight.data_ptr());
134  auto output_usr_memory = memory({{{output_tz}, data_t, format_nchw}, cpu_engine},
135  output.data_ptr());
136 
137  std::vector<primitive> net;
138 
139  auto input_pd = conv_forward_pd->src_primitive_desc();
140  auto input_memory = input_usr_memory;
141  if (input_usr_memory.get_primitive_desc() != memory::primitive_desc(input_pd)) {
142  input_memory = memory(input_pd);
143  net.push_back(reorder(input_usr_memory, input_memory));
144  }
145 
146  auto weight_pd = conv_forward_pd->weights_primitive_desc();
147  auto weight_memory = weight_usr_memory;
148  if (weight_usr_memory.get_primitive_desc() != memory::primitive_desc(weight_pd)) {
149  weight_memory = memory(weight_pd);
150  net.push_back(reorder(weight_usr_memory, weight_memory));
151  }
152 
153  auto output_pd = conv_forward_pd->dst_primitive_desc();
154  auto output_memory = output_usr_memory;
155  if (output_usr_memory.get_primitive_desc() != memory::primitive_desc(output_pd)) {
156  output_memory = memory(output_pd);
157  }
158 
159  std::shared_ptr<convolution_forward> conv_forward;
160  std::shared_ptr<memory> bias_usr_memory;
161  if (bias.defined()) {
162  bias_usr_memory.reset(new memory({{{bias_tz}, data_t, format_x}, cpu_engine},
163  bias.data_ptr()));
164  conv_forward.reset(new convolution_forward(*conv_forward_pd, input_memory,
165  weight_memory, *bias_usr_memory, output_memory));
166  } else {
167  conv_forward.reset(new convolution_forward(*conv_forward_pd, input_memory,
168  weight_memory, output_memory));
169  }
170  net.push_back(*conv_forward);
171 
172  if (output_memory != output_usr_memory) {
173  net.push_back(reorder(output_memory, output_usr_memory));
174  }
175 
176  Stream::Instance().get_stream().submit(net);
177 
178  return output;
179 }
180 
181 Tensor mkldnn_convolution_backward_input(
182  IntArrayRef input_size, const at::Tensor& grad_output, const at::Tensor& weight,
183  IntArrayRef padding, IntArrayRef stride, IntArrayRef dilation, int64_t groups, bool bias_defined)
184 {
185  auto grad_input = at::empty(input_size, grad_output.options());
186 
187  auto cpu_engine = CpuEngine::Instance().get_engine();
188 
189  int32_t g = groups;
190 
191  int32_t n = grad_input.size(0);
192  int32_t ic = grad_input.size(1);
193  int32_t ih = grad_input.size(2);
194  int32_t iw = grad_input.size(3);
195 
196  int32_t oc = grad_output.size(1);
197  int32_t oh = grad_output.size(2);
198  int32_t ow = grad_output.size(3);
199 
200  int32_t kh = weight.size(2);
201  int32_t kw = weight.size(3);
202 
203  int32_t sh = stride[0];
204  int32_t sw = stride[1];
205  int32_t ph = padding[0];
206  int32_t pw = padding[1];
207 
208  auto data_t = memory::data_type::f32;
209  auto format_any = memory::format::any;
210  auto format_nchw = memory::format::nchw;
211  auto format_weight = (g!= 1) ? memory::format::goihw : memory::format::oihw;
212 
213  memory::dims input_tz = {n, ic, ih, iw};
214  memory::dims weight_tz = (g!= 1) ? memory::dims{g, oc/g, ic/g, kh, kw} : memory::dims{oc, ic, kh, kw};
215  memory::dims bias_tz = {oc};
216  memory::dims output_tz = {n, oc, oh, ow};
217  memory::dims _stride = {sh, sw};
218  memory::dims _padding = {ph, pw};
219 
220  auto input_md = memory::desc({input_tz}, data_t, format_any);
221  auto weight_md = memory::desc({weight_tz}, data_t, format_any);
222  auto bias_md = memory::desc({bias_tz}, data_t, format_any);
223  auto output_md = memory::desc({output_tz}, data_t, format_any);
224 
225  // need to re-create conv_forward_pd to feed conv_backward_data_pd
226  std::shared_ptr<convolution_forward::desc> conv_forward_desc;
227  if (bias_defined) {
228  conv_forward_desc.reset(new convolution_forward::desc(prop_kind::forward,
229  convolution_direct, input_md, weight_md, bias_md, output_md,
230  _stride, _padding, _padding, padding_kind::zero));
231  } else {
232  conv_forward_desc.reset(new convolution_forward::desc(prop_kind::forward,
233  convolution_direct, input_md, weight_md, output_md,
234  _stride, _padding, _padding, padding_kind::zero));
235  }
236 
237  std::shared_ptr<convolution_forward::primitive_desc> conv_forward_pd;
238  conv_forward_pd.reset(new convolution_forward::primitive_desc(
239  *conv_forward_desc, cpu_engine));
240 
241  std::shared_ptr<convolution_backward_data::desc> conv_backward_data_desc;
242  conv_backward_data_desc.reset(new convolution_backward_data::desc(
243  convolution_direct, input_md, weight_md, output_md,
244  _stride, _padding, _padding, padding_kind::zero));
245 
246  std::shared_ptr<convolution_backward_data::primitive_desc> conv_backward_data_pd;
247  conv_backward_data_pd.reset(new convolution_backward_data::primitive_desc(
248  *conv_backward_data_desc, cpu_engine, *conv_forward_pd));
249 
250  auto grad_output_usr_memory = memory({{{output_tz}, data_t, format_nchw}, cpu_engine},
251  grad_output.data_ptr());
252  auto weight_usr_memory = memory({{{weight_tz}, data_t, format_weight}, cpu_engine},
253  weight.data_ptr());
254  auto grad_input_usr_memory = memory({{{input_tz}, data_t, format_nchw}, cpu_engine},
255  grad_input.data_ptr());
256 
257  std::vector<primitive> net;
258 
259  auto grad_output_pd = conv_backward_data_pd->diff_dst_primitive_desc();
260  auto grad_output_memory = grad_output_usr_memory;
261  if (grad_output_usr_memory.get_primitive_desc() != memory::primitive_desc(grad_output_pd)) {
262  grad_output_memory = memory(grad_output_pd);
263  net.push_back(reorder(grad_output_usr_memory, grad_output_memory));
264  }
265 
266  auto weight_pd = conv_backward_data_pd->weights_primitive_desc();
267  auto weight_memory = weight_usr_memory;
268  if (weight_usr_memory.get_primitive_desc() != memory::primitive_desc(weight_pd)) {
269  weight_memory = memory(weight_pd);
270  net.push_back(reorder(weight_usr_memory, weight_memory));
271  }
272 
273  auto grad_input_pd = conv_backward_data_pd->diff_src_primitive_desc();
274  auto grad_input_memory = grad_input_usr_memory;
275  if (grad_input_memory.get_primitive_desc() != memory::primitive_desc(grad_input_pd)) {
276  grad_input_memory = memory(grad_input_pd);
277  }
278 
279  std::shared_ptr<convolution_backward_data> conv_backward_data;
280  conv_backward_data.reset(new convolution_backward_data(*conv_backward_data_pd,
281  grad_output_memory, weight_memory, grad_input_memory));
282  net.push_back(*conv_backward_data);
283 
284  if (grad_input_memory != grad_input_usr_memory) {
285  net.push_back(reorder(grad_input_memory, grad_input_usr_memory));
286  }
287 
288  Stream::Instance().get_stream().submit(net);
289 
290  return grad_input;
291 }
292 
293 std::tuple<at::Tensor, at::Tensor> mkldnn_convolution_backward_weights(
294  IntArrayRef weight_size, const at::Tensor& grad_output, const at::Tensor& input,
295  IntArrayRef padding, IntArrayRef stride, IntArrayRef dilation, int64_t groups, bool bias_defined)
296 {
297  auto grad_weight = at::empty(weight_size, grad_output.options());
298 
299  Tensor grad_bias;
300  if (bias_defined) {
301  grad_bias = at::empty({grad_output.size(1)}, grad_output.options());
302  }
303 
304  auto cpu_engine = CpuEngine::Instance().get_engine();
305 
306  int32_t g = groups;
307 
308  int32_t n = input.size(0);
309  int32_t ic = input.size(1);
310  int32_t ih = input.size(2);
311  int32_t iw = input.size(3);
312 
313  int32_t oc = grad_output.size(1);
314  int32_t oh = grad_output.size(2);
315  int32_t ow = grad_output.size(3);
316 
317  int32_t kh = grad_weight.size(2);
318  int32_t kw = grad_weight.size(3);
319 
320  int32_t sh = stride[0];
321  int32_t sw = stride[1];
322  int32_t ph = padding[0];
323  int32_t pw = padding[1];
324 
325  auto data_t = memory::data_type::f32;
326  auto format_any = memory::format::any;
327  auto format_nchw = memory::format::nchw;
328  auto format_weight = (g!= 1) ? memory::format::goihw : memory::format::oihw;
329  auto format_x = memory::format::x;
330 
331  memory::dims input_tz = {n, ic, ih, iw};
332  memory::dims weight_tz = (g!= 1) ? memory::dims{g, oc/g, ic/g, kh, kw} : memory::dims{oc, ic, kh, kw};
333  memory::dims bias_tz = {oc};
334  memory::dims output_tz = {n, oc, oh, ow};
335  memory::dims _stride = {sh, sw};
336  memory::dims _padding = {ph, pw};
337 
338  memory::desc input_md({input_tz}, data_t, format_any);
339  memory::desc weight_md({weight_tz}, data_t, format_any);
340  memory::desc bias_md({bias_tz}, data_t, format_any);
341  memory::desc output_md({output_tz}, data_t, format_any);
342 
343  // need to re-create conv_forward_pd to feed conv_backward_weight_pd
344  std::shared_ptr<convolution_forward::desc> conv_forward_desc;
345  if (bias_defined) {
346  conv_forward_desc.reset(new convolution_forward::desc(prop_kind::forward,
347  convolution_direct, input_md, weight_md, bias_md, output_md,
348  _stride, _padding, _padding, padding_kind::zero));
349  } else {
350  conv_forward_desc.reset(new convolution_forward::desc(prop_kind::forward,
351  convolution_direct, input_md, weight_md, output_md,
352  _stride, _padding, _padding, padding_kind::zero));
353  }
354 
355  std::shared_ptr<convolution_forward::primitive_desc> conv_forward_pd;
356  conv_forward_pd.reset(new convolution_forward::primitive_desc(
357  *conv_forward_desc, cpu_engine));
358 
359  std::shared_ptr<convolution_backward_weights::desc> conv_backward_weight_desc;
360  if (bias_defined) {
361  conv_backward_weight_desc.reset(new convolution_backward_weights::desc(
362  convolution_direct, input_md, weight_md, bias_md, output_md,
363  _stride, _padding, _padding, padding_kind::zero));
364  } else {
365  conv_backward_weight_desc.reset(new convolution_backward_weights::desc(
366  convolution_direct, input_md, weight_md, output_md,
367  _stride, _padding, _padding, padding_kind::zero));
368  }
369 
370  std::shared_ptr<convolution_backward_weights::primitive_desc> conv_backward_weight_pd;
371  conv_backward_weight_pd.reset(new convolution_backward_weights::primitive_desc(
372  *conv_backward_weight_desc, cpu_engine, *conv_forward_pd));
373 
374  auto input_usr_memory = memory({{{input_tz}, data_t, format_nchw}, cpu_engine},
375  input.data_ptr());
376  auto grad_output_usr_memory = memory({{{output_tz}, data_t, format_nchw}, cpu_engine},
377  grad_output.data_ptr());
378  auto grad_weight_usr_memory = memory({{{weight_tz}, data_t, format_weight}, cpu_engine},
379  grad_weight.data_ptr());
380  std::shared_ptr<memory> grad_bias_memory;
381 
382  std::vector<primitive> net;
383 
384  auto input_pd = conv_backward_weight_pd->src_primitive_desc();
385  auto input_memory = input_usr_memory;
386  if (input_usr_memory.get_primitive_desc() != memory::primitive_desc(input_pd)) {
387  input_memory = memory(input_pd);
388  net.push_back(reorder(input_usr_memory, input_memory));
389  }
390 
391  auto grad_output_pd = conv_backward_weight_pd->diff_dst_primitive_desc();
392  auto grad_output_memory = grad_output_usr_memory;
393  if (grad_output_usr_memory.get_primitive_desc() != memory::primitive_desc(grad_output_pd)) {
394  grad_output_memory = memory(grad_output_pd);
395  net.push_back(reorder(grad_output_usr_memory, grad_output_memory));
396  }
397 
398  auto grad_weight_pd = conv_backward_weight_pd->diff_weights_primitive_desc();
399  auto grad_weight_memory = grad_weight_usr_memory;
400  if (grad_weight_usr_memory.get_primitive_desc() != memory::primitive_desc(grad_weight_pd)) {
401  grad_weight_memory = memory(grad_weight_pd);
402  }
403 
404  std::shared_ptr<convolution_backward_weights> conv_backward_weight;
405  if (bias_defined) {
406  grad_bias_memory.reset(new memory({{{bias_tz}, data_t, format_x}, cpu_engine},
407  grad_bias.data_ptr()));
408  conv_backward_weight.reset(new convolution_backward_weights(*conv_backward_weight_pd,
409  input_memory, grad_output_memory, grad_weight_memory, *grad_bias_memory));
410  } else {
411  conv_backward_weight.reset(new convolution_backward_weights(*conv_backward_weight_pd,
412  input_memory, grad_output_memory, grad_weight_memory));
413  }
414 
415  net.push_back(*conv_backward_weight);
416 
417  if (grad_weight_memory != grad_weight_usr_memory) {
418  net.push_back(reorder(grad_weight_memory, grad_weight_usr_memory));
419  }
420 
421  Stream::Instance().get_stream().submit(net);
422 
423  return std::tuple<at::Tensor, at::Tensor>{grad_weight, grad_bias};
424 }
425 
426 std::tuple<at::Tensor,at::Tensor,at::Tensor> mkldnn_convolution_backward(
427  const at::Tensor& input, const at::Tensor& grad_output_t, const at::Tensor& weight,
428  IntArrayRef padding, IntArrayRef stride, IntArrayRef dilation, int64_t groups, std::array<bool,3> output_mask)
429 {
430  Tensor grad_output = grad_output_t.contiguous();
431 
432  Tensor grad_input, grad_weight, grad_bias;
433  if (output_mask[0]) {
434  grad_input = at::mkldnn_convolution_backward_input(
435  input.sizes(), grad_output, weight, padding, stride, dilation, groups, output_mask[2]);
436  }
437  if (output_mask[1] || output_mask[2]) {
438  std::tie(grad_weight, grad_bias) = at::mkldnn_convolution_backward_weights(
439  weight.sizes(), grad_output, input, padding, stride, dilation, groups, output_mask[2]);
440  }
441 
442  return std::tuple<Tensor, Tensor, Tensor>{grad_input, grad_weight, grad_bias};
443 }
444 
445 }} // namespace at::native
446 
447 #endif
TensorOptions options() const
Returns the TensorOptions corresponding to this Tensor.
Definition: TensorMethods.h:42
Flush-To-Zero and Denormals-Are-Zero mode.