Caffe2 - C++ API
A deep learning, cross platform ML framework
common_cudnn.h
1 #ifndef CAFFE2_CORE_COMMON_CUDNN_H_
2 #define CAFFE2_CORE_COMMON_CUDNN_H_
3 
4 #include <array>
5 #include <mutex>
6 
7 #include "caffe2/core/common.h"
8 #include "caffe2/core/context.h"
9 #include "caffe2/core/logging.h"
10 #include "caffe2/core/types.h"
11 
12 #ifndef CAFFE2_USE_CUDNN
13 #error("This Caffe2 install is not built with cudnn, so you should not include this file.");
14 #endif
15 
16 #include <cudnn.h>
17 
18 static_assert(
19  CUDNN_VERSION >= 5000,
20  "Caffe2 requires cudnn version 5.0 or above.");
21 
22 #if CUDNN_VERSION < 6000
23 #pragma message "CUDNN version under 6.0 is supported at best effort."
24 #pragma message "We strongly encourage you to move to 6.0 and above."
25 #pragma message "This message is intended to annoy you enough to update."
26 #endif // CUDNN_VERSION < 6000
27 
28 #define CUDNN_VERSION_MIN(major, minor, patch) \
29  (CUDNN_VERSION >= ((major) * 1000 + (minor) * 100 + (patch)))
30 
31 namespace caffe2 {
32 
33 namespace internal {
37 inline const char* cudnnGetErrorString(cudnnStatus_t status) {
38  switch (status) {
39  case CUDNN_STATUS_SUCCESS:
40  return "CUDNN_STATUS_SUCCESS";
41  case CUDNN_STATUS_NOT_INITIALIZED:
42  return "CUDNN_STATUS_NOT_INITIALIZED";
43  case CUDNN_STATUS_ALLOC_FAILED:
44  return "CUDNN_STATUS_ALLOC_FAILED";
45  case CUDNN_STATUS_BAD_PARAM:
46  return "CUDNN_STATUS_BAD_PARAM";
47  case CUDNN_STATUS_INTERNAL_ERROR:
48  return "CUDNN_STATUS_INTERNAL_ERROR";
49  case CUDNN_STATUS_INVALID_VALUE:
50  return "CUDNN_STATUS_INVALID_VALUE";
51  case CUDNN_STATUS_ARCH_MISMATCH:
52  return "CUDNN_STATUS_ARCH_MISMATCH";
53  case CUDNN_STATUS_MAPPING_ERROR:
54  return "CUDNN_STATUS_MAPPING_ERROR";
55  case CUDNN_STATUS_EXECUTION_FAILED:
56  return "CUDNN_STATUS_EXECUTION_FAILED";
57  case CUDNN_STATUS_NOT_SUPPORTED:
58  return "CUDNN_STATUS_NOT_SUPPORTED";
59  case CUDNN_STATUS_LICENSE_ERROR:
60  return "CUDNN_STATUS_LICENSE_ERROR";
61  default:
62  return "Unknown cudnn error number";
63  }
64 }
65 } // namespace internal
66 
67 // A macro that wraps around a cudnn statement so we can check if the cudnn
68 // execution finishes or not.
69 #define CUDNN_ENFORCE(condition) \
70  do { \
71  cudnnStatus_t status = condition; \
72  CAFFE_ENFORCE_EQ( \
73  status, \
74  CUDNN_STATUS_SUCCESS, \
75  ", Error at: ", \
76  __FILE__, \
77  ":", \
78  __LINE__, \
79  ": ", \
80  ::caffe2::internal::cudnnGetErrorString(status)); \
81  } while (0)
82 #define CUDNN_CHECK(condition) \
83  do { \
84  cudnnStatus_t status = condition; \
85  CHECK(status == CUDNN_STATUS_SUCCESS) \
86  << ::caffe2::internal::cudnnGetErrorString(status); \
87  } while (0)
88 
89 // report the version of cuDNN Caffe2 was compiled with
90 inline size_t cudnnCompiledVersion() {
91  return CUDNN_VERSION;
92 }
93 // report the runtime version of cuDNN
94 inline size_t cudnnRuntimeVersion() {
95  return cudnnGetVersion();
96 }
97 
98 // Check compatibility of compiled and runtime cuDNN versions
99 inline void CheckCuDNNVersions() {
100  // Version format is major*1000 + minor*100 + patch
101  // If compiled with version < 7, major, minor and patch must all match
102  // If compiled with version >= 7, then either
103  // runtime_version > compiled_version
104  // major and minor match
105  bool version_match = cudnnCompiledVersion() == cudnnRuntimeVersion();
106  bool compiled_with_7 = cudnnCompiledVersion() >= 7000;
107  bool backwards_compatible_7 = compiled_with_7 && cudnnRuntimeVersion() >= cudnnCompiledVersion();
108  bool patch_compatible = compiled_with_7 && (cudnnRuntimeVersion() / 100) == (cudnnCompiledVersion() / 100);
109  CAFFE_ENFORCE(version_match || backwards_compatible_7 || patch_compatible,
110  "cuDNN compiled (", cudnnCompiledVersion(), ") and "
111  "runtime (", cudnnRuntimeVersion(), ") versions mismatch");
112 }
113 
119 template <typename T>
121 
122 template <>
123 class cudnnTypeWrapper<float> {
124  public:
125  static const cudnnDataType_t type = CUDNN_DATA_FLOAT;
126  typedef const float ScalingParamType;
127  typedef float BNParamType;
128  static ScalingParamType* kOne() {
129  static ScalingParamType v = 1.0;
130  return &v;
131  }
132  static const ScalingParamType* kZero() {
133  static ScalingParamType v = 0.0;
134  return &v;
135  }
136 };
137 
138 #if CUDNN_VERSION_MIN(6, 0, 0)
139 template <>
140 class cudnnTypeWrapper<int> {
141  public:
142  static const cudnnDataType_t type = CUDNN_DATA_INT32;
143  typedef const int ScalingParamType;
144  typedef int BNParamType;
145  static ScalingParamType* kOne() {
146  static ScalingParamType v = 1;
147  return &v;
148  }
149  static const ScalingParamType* kZero() {
150  static ScalingParamType v = 0;
151  return &v;
152  }
153 };
154 #endif // CUDNN_VERSION_MIN(6, 0, 0)
155 
156 template <>
157 class cudnnTypeWrapper<double> {
158  public:
159  static const cudnnDataType_t type = CUDNN_DATA_DOUBLE;
160  typedef const double ScalingParamType;
161  typedef double BNParamType;
162  static ScalingParamType* kOne() {
163  static ScalingParamType v = 1.0;
164  return &v;
165  }
166  static ScalingParamType* kZero() {
167  static ScalingParamType v = 0.0;
168  return &v;
169  }
170 };
171 
172 template <>
174  public:
175  static const cudnnDataType_t type = CUDNN_DATA_HALF;
176  typedef const float ScalingParamType;
177  typedef float BNParamType;
178  static ScalingParamType* kOne() {
179  static ScalingParamType v = 1.0;
180  return &v;
181  }
182  static ScalingParamType* kZero() {
183  static ScalingParamType v = 0.0;
184  return &v;
185  }
186 };
187 
192 inline cudnnTensorFormat_t GetCudnnTensorFormat(const StorageOrder& order) {
193  switch (order) {
194  case StorageOrder::NHWC:
195  return CUDNN_TENSOR_NHWC;
196  case StorageOrder::NCHW:
197  return CUDNN_TENSOR_NCHW;
198  default:
199  LOG(FATAL) << "Unknown cudnn equivalent for order: " << order;
200  }
201  // Just to suppress compiler warnings
202  return CUDNN_TENSOR_NCHW;
203 }
204 
211  public:
213  CUDNN_ENFORCE(cudnnCreateTensorDescriptor(&desc_));
214  }
215  ~cudnnTensorDescWrapper() noexcept {
216  CUDNN_CHECK(cudnnDestroyTensorDescriptor(desc_));
217  }
218 
219  inline cudnnTensorDescriptor_t Descriptor(
220  const cudnnTensorFormat_t format,
221  const cudnnDataType_t type,
222  const vector<int>& dims,
223  bool* changed) {
224  if (type_ == type && format_ == format && dims_ == dims) {
225  // if not changed, simply return the current descriptor.
226  if (changed)
227  *changed = false;
228  return desc_;
229  }
230  CAFFE_ENFORCE_EQ(
231  dims.size(), 4, "Currently only 4-dimensional descriptor supported.");
232  format_ = format;
233  type_ = type;
234  dims_ = dims;
235  CUDNN_ENFORCE(cudnnSetTensor4dDescriptor(
236  desc_,
237  format,
238  type,
239  dims_[0],
240  (format == CUDNN_TENSOR_NCHW ? dims_[1] : dims_[3]),
241  (format == CUDNN_TENSOR_NCHW ? dims_[2] : dims_[1]),
242  (format == CUDNN_TENSOR_NCHW ? dims_[3] : dims_[2])));
243  if (changed)
244  *changed = true;
245  return desc_;
246  }
247 
248  template <typename T>
249  inline cudnnTensorDescriptor_t Descriptor(
250  const StorageOrder& order,
251  const vector<int>& dims) {
252  return Descriptor(
253  GetCudnnTensorFormat(order), cudnnTypeWrapper<T>::type, dims, nullptr);
254  }
255 
256  private:
257  cudnnTensorDescriptor_t desc_;
258  cudnnTensorFormat_t format_;
259  cudnnDataType_t type_;
260  vector<int> dims_;
261  C10_DISABLE_COPY_AND_ASSIGN(cudnnTensorDescWrapper);
262 };
263 
265  public:
267  CUDNN_ENFORCE(cudnnCreateFilterDescriptor(&desc_));
268  }
269  ~cudnnFilterDescWrapper() noexcept {
270  CUDNN_CHECK(cudnnDestroyFilterDescriptor(desc_));
271  }
272 
273  inline cudnnFilterDescriptor_t Descriptor(
274  const StorageOrder& order,
275  const cudnnDataType_t type,
276  const vector<int>& dims,
277  bool* changed) {
278  if (type_ == type && order_ == order && dims_ == dims) {
279  // if not changed, simply return the current descriptor.
280  if (changed)
281  *changed = false;
282  return desc_;
283  }
284  CAFFE_ENFORCE_EQ(
285  dims.size(), 4, "Currently only 4-dimensional descriptor supported.");
286  order_ = order;
287  type_ = type;
288  dims_ = dims;
289  CUDNN_ENFORCE(cudnnSetFilter4dDescriptor(
290  desc_,
291  type,
292  GetCudnnTensorFormat(order),
293  dims_[0],
294  // TODO - confirm that this is correct for NHWC
295  (order == StorageOrder::NCHW ? dims_[1] : dims_[3]),
296  (order == StorageOrder::NCHW ? dims_[2] : dims_[1]),
297  (order == StorageOrder::NCHW ? dims_[3] : dims_[2])));
298  if (changed)
299  *changed = true;
300  return desc_;
301  }
302 
303  template <typename T>
304  inline cudnnFilterDescriptor_t Descriptor(
305  const StorageOrder& order,
306  const vector<int>& dims) {
307  return Descriptor(order, cudnnTypeWrapper<T>::type, dims, nullptr);
308  }
309 
310  private:
311  cudnnFilterDescriptor_t desc_;
312  StorageOrder order_;
313  cudnnDataType_t type_;
314  vector<int> dims_;
315  C10_DISABLE_COPY_AND_ASSIGN(cudnnFilterDescWrapper);
316 };
317 
318 
319 } // namespace caffe2
320 
321 #endif // CAFFE2_CORE_COMMON_CUDNN_H_
cudnnTensorFormat_t GetCudnnTensorFormat(const StorageOrder &order)
A wrapper function to convert the Caffe storage order to cudnn storage order enum values...
Definition: common_cudnn.h:192
A global dictionary that holds information about what Caffe2 modules have been loaded in the current ...
Definition: blob.h:13
Flush-To-Zero and Denormals-Are-Zero mode.
cudnnTensorDescWrapper is the placeholder that wraps around a cudnnTensorDescriptor_t, allowing us to do descriptor change as-needed during runtime.
Definition: common_cudnn.h:210
cudnnTypeWrapper is a wrapper class that allows us to refer to the cudnn type in a template function...
Definition: common_cudnn.h:120