Caffe2 - C++ API
A deep learning, cross platform ML framework
activation_ops_cudnn.h
1 #ifndef CAFFE2_OPERATORS_ACTIVATION_OPS_CUDNN_H_
2 #define CAFFE2_OPERATORS_ACTIVATION_OPS_CUDNN_H_
3 
4 #include "caffe2/core/context_gpu.h"
5 #include "caffe2/core/cudnn_wrappers.h"
6 #include "caffe2/core/operator.h"
7 #include "caffe2/core/tensor.h"
8 #include "caffe2/core/types.h"
9 
10 namespace caffe2 {
11 
12 class CuDNNActivationOpBase : public Operator<CUDAContext> {
13  public:
14  USE_OPERATOR_FUNCTIONS(CUDAContext);
15 
16  template <class... Args>
17  explicit CuDNNActivationOpBase(Args&&... args)
18  : Operator<CUDAContext>(std::forward<Args>(args)...),
19  cudnn_wrapper_(&context_) {
20  CUDNN_ENFORCE(cudnnCreateTensorDescriptor(&data_desc_));
21  CUDNN_ENFORCE(cudnnCreateActivationDescriptor(&act_desc_));
22  }
23 
24  virtual ~CuDNNActivationOpBase() {
25  CUDNN_ENFORCE(cudnnDestroyTensorDescriptor(data_desc_));
26  CUDNN_ENFORCE(cudnnDestroyActivationDescriptor(act_desc_));
27  }
28 
29  protected:
30  void SetTensorDescriptor(
31  const cudnnDataType_t data_type,
32  const int data_size) {
33  if (data_size != input_size_) {
34  // Since the best performance is obtained when the tesor is HW-packed, we
35  // put X.size() to W.
36  input_size_ = data_size;
37  CUDNN_ENFORCE(cudnnSetTensor4dDescriptor(
38  data_desc_,
39  GetCudnnTensorFormat(StorageOrder::NCHW),
40  data_type,
41  1,
42  1,
43  1,
44  input_size_));
45  }
46  }
47 
48  CuDNNWrapper cudnn_wrapper_;
49  cudnnTensorDescriptor_t data_desc_;
50  cudnnActivationDescriptor_t act_desc_;
51 
52  int input_size_ = 0;
53 };
54 
55 template <cudnnActivationMode_t kCuDNNActivationMode>
57  public:
58  USE_OPERATOR_FUNCTIONS(CUDAContext);
59 
60  template <class... Args>
61  explicit CuDNNActivationOp(Args&&... args)
62  : CuDNNActivationOpBase(std::forward<Args>(args)...) {
63  CUDNN_ENFORCE(cudnnSetActivationDescriptor(
64  act_desc_, kCuDNNActivationMode, CUDNN_PROPAGATE_NAN, 0.0));
65  }
66 
67  bool RunOnDevice() override {
68  return DispatchHelper<TensorTypes<float, at::Half>>::call(this, Input(0));
69  }
70 
71  template <typename T>
72  bool DoRunWithType() {
73  const auto& X = Input(0);
74 
75  auto* Y = Output(0, X.sizes(), at::dtype<T>());
76  if (X.numel() == 0) {
77  Y->template mutable_data<T>();
78  return true;
79  }
80  this->SetTensorDescriptor(cudnnTypeWrapper<T>::type, X.numel());
81  CUDNN_ENFORCE(cudnnActivationForward(
82  this->cudnn_wrapper_.inline_cudnn_handle(),
83  this->act_desc_,
85  this->data_desc_,
86  X.template data<T>(),
88  this->data_desc_,
89  Y->template mutable_data<T>()));
90  return true;
91  }
92 };
93 
94 template <cudnnActivationMode_t kCuDNNActivationMode>
96  public:
97  USE_OPERATOR_FUNCTIONS(CUDAContext);
98 
99  template <class... Args>
100  explicit CuDNNActivationGradientOp(Args&&... args)
101  : CuDNNActivationOpBase(std::forward<Args>(args)...) {
102  CUDNN_ENFORCE(cudnnSetActivationDescriptor(
103  act_desc_, kCuDNNActivationMode, CUDNN_PROPAGATE_NAN, 0.0));
104  }
105 
106  bool RunOnDevice() override {
107  return DispatchHelper<TensorTypes<float, at::Half>>::call(this, Input(0));
108  }
109 
110  template <typename T>
111  bool DoRunWithType() {
112  const auto& Y = Input(0);
113  const auto& dY = Input(1);
114 
115  auto* dX = Output(0, Y.sizes(), at::dtype<T>());
116  if (Y.numel() == 0) {
117  dX->template mutable_data<T>();
118  return true;
119  }
120  this->SetTensorDescriptor(cudnnTypeWrapper<T>::type, Y.numel());
121  CUDNN_ENFORCE(cudnnActivationBackward(
122  this->cudnn_wrapper_.inline_cudnn_handle(),
123  this->act_desc_,
125  this->data_desc_,
126  Y.template data<T>(),
127  this->data_desc_,
128  dY.template data<T>(),
129  this->data_desc_,
130  Y.template data<T>(), // Use Y_data as placeholder here.
132  this->data_desc_,
133  dX->template mutable_data<T>()));
134  return true;
135  }
136 };
137 
138 } // namespace caffe2
139 
140 #endif // CAFFE2_OPERATORS_ACTIVATION_OPS_CUDNN_H_
cudnnTensorFormat_t GetCudnnTensorFormat(const StorageOrder &order)
A wrapper function to convert the Caffe storage order to cudnn storage order enum values...
Definition: common_cudnn.h:192
const Tensor & Input(int idx, DeviceType type=CUDAContext::GetDeviceType())
Retrieve a non-owning reference to the input at position &#39;idx&#39; for this operator. ...
Definition: operator.h:702
A global dictionary that holds information about what Caffe2 modules have been loaded in the current ...
Definition: blob.h:13
CuDNNWrapper is a class that wraps the cudnn handles and cudnn workspaces.
cudnnTypeWrapper is a wrapper class that allows us to refer to the cudnn type in a template function...
Definition: common_cudnn.h:120