Caffe2 - C++ API
A deep learning, cross platform ML framework
GridSampler.cpp
1 #include <ATen/ATen.h>
2 #include <ATen/NativeFunctions.h>
3 #include <ATen/Config.h>
4 #include <ATen/cuda/CUDAConfig.h>
5 
6 #if !AT_CUDNN_ENABLED()
7 
8 namespace at { namespace native {
9 
10 // See Note [ATen preprocessor philosophy]
11 
12 Tensor cudnn_grid_sampler_forward(
13  const Tensor& input_t, const Tensor& grid_t) {
14  AT_ERROR("cudnn_grid_sampler_forward: ATen not compiled with cuDNN support");
15 }
16 
17 std::tuple<Tensor, Tensor> cudnn_grid_sampler_backward(
18  const Tensor& input_t, const Tensor& grid_t,
19  const Tensor& grad_output_t) {
20  AT_ERROR("cudnn_grid_sampler_backward: ATen not compiled with cuDNN support");
21 }
22 
23 }}
24 
25 #else // AT_CUDNN_ENABLED
26 
27 #include <ATen/cudnn/Descriptors.h>
28 #include <ATen/cudnn/Types.h>
29 #include <ATen/cudnn/Utils.h>
30 #include <ATen/cuda/Exceptions.h>
31 
32 #include <ATen/TensorUtils.h>
33 
34 // TODO: descriptor checking
35 
36 
37 namespace at { namespace native {
38 
39 namespace {
40 
41 void setSamplerDescriptor(SpatialTransformerDescriptor& desc, cudnnDataType_t dataType, const at::Tensor& tensor)
42 {
43  int inputSize[4] = {0};
44  for (int i = 0; i < tensor.dim(); ++i) {
45  inputSize[i] = (int) tensor.size(i);
46  }
47  desc.set(dataType, 4, inputSize);
48 }
49 
50 void checkGridSize(CheckedFrom c, TensorArg grid, TensorArg input)
51 {
52  // assert size of grid is n*h*w*2
53  // FYI: grid is between [-1, 1], where -1 left most pixel,
54  // 1 represents right most pixel (and hence 0 is the center pixel)
55  // if grid has values >1 or <-1, those values are ignored
56  checkContiguous(c, grid);
57  checkDim(c, grid, 4);
58  // TODO: Maybe more user friendly to report where the expected size
59  // came from
60  checkSize(c, grid, 0, input->size(0));
61  checkSize(c, grid, 3, 2);
62 }
63 
64 } // namespace
65 
66 Tensor cudnn_grid_sampler_forward(
67  const Tensor& input_t, const Tensor& grid_t)
68 {
69  TensorArg input{ contiguousIfZeroInStrides(input_t), "input", 1 },
70  grid{ grid_t.contiguous(), "grid", 2 };
71  CheckedFrom c = "cudnn_grid_sampler_forward";
72  setCuDNNStreamToCurrent();
73  checkAllSameGPU(c, {input, grid});
74  checkAllSameType(c, {input, grid});
75  checkGridSize(c, grid, input);
76  checkDim(c, input, 4);
77 
78  auto output_t = at::empty({0}, input->options());
79  output_t.resize_({input->size(0), input->size(1), grid->size(1), grid->size(2)});
80 
81  TensorDescriptor idesc{ *input }; // input descriptor
82  TensorDescriptor odesc{ output_t }; // output descriptor
83  SpatialTransformerDescriptor desc; // sampler descriptor
84 
85  auto handle = getCudnnHandle();
86  auto dataType = getCudnnDataType(*input);
87  setSamplerDescriptor(desc, dataType, output_t);
88 
89  Constant one(dataType, 1);
90  Constant zero(dataType, 0);
91  AT_CUDNN_CHECK(cudnnSpatialTfSamplerForward(
92  handle, desc.desc(),
93  &one, idesc.desc(), input->data_ptr(),
94  grid->data_ptr(),
95  &zero, odesc.desc(), output_t.data_ptr()
96  ));
97 
98  return output_t;
99 }
100 
101 // NB: CuDNN does not support output mask; you always get both
102 // gradients.
103 std::tuple<Tensor, Tensor> cudnn_grid_sampler_backward(
104  const Tensor& input_t, const Tensor& grid_t,
105  const Tensor& grad_output_t)
106 {
107  TensorArg input{ contiguousIfZeroInStrides(input_t), "input", 1 },
108  grid{ grid_t.contiguous(), "grid", 2 },
109  grad_output{ contiguousIfZeroInStrides(grad_output_t), "grad_output", 3 };
110  CheckedFrom c = "cudnn_grid_sampler_backward";
111  setCuDNNStreamToCurrent();
112  checkAllSameGPU(c, {input, grad_output, grid});
113  checkGridSize(c, grid, input);
114  checkDim(c, input, 4);
115  checkDim(c, grad_output, 4);
116 
117  auto grad_input_t = at::empty({0}, input->options());
118  grad_input_t.resize_(input->sizes());
119  auto grad_grid_t = at::empty({0}, grid->options());
120  grad_grid_t.resize_(grid->sizes());
121 
122  TensorDescriptor idesc{ *input }; // input descriptor
123  TensorDescriptor odesc{ *grad_output }; // grad_output descriptor
124  TensorDescriptor gdesc{ grad_input_t }; // grad_input descriptor
125  SpatialTransformerDescriptor desc; // sampler descriptor
126 
127  auto handle = getCudnnHandle();
128  auto dataType = getCudnnDataType(*input);
129  setSamplerDescriptor(desc, dataType, *grad_output);
130 
131  Constant one(dataType, 1);
132  Constant zero(dataType, 0);
133  AT_CUDNN_CHECK(cudnnSpatialTfSamplerBackward(
134  handle, desc.desc(),
135  &one, idesc.desc(), input->data_ptr(),
136  &zero, gdesc.desc(), grad_input_t.data_ptr(),
137  &one, odesc.desc(), grad_output->data_ptr(),
138  // intruigingly, the outputs don't need descriptors
139  grid->data_ptr(),
140  &zero, grad_grid_t.data_ptr()
141  ));
142 
143  return std::tuple<Tensor, Tensor>{ grad_input_t, grad_grid_t };
144 }
145 
146 }} // namespace at::cudnn
147 
148 #endif
Flush-To-Zero and Denormals-Are-Zero mode.