doxygen-c/html/activation__ops__cudnn_8h_source.html

 #ifndef CAFFE2_OPERATORS_ACTIVATION_OPS_CUDNN_H_
 #define CAFFE2_OPERATORS_ACTIVATION_OPS_CUDNN_H_

 #include "caffe2/core/context_gpu.h"
 #include "caffe2/core/cudnn_wrappers.h"
 #include "caffe2/core/operator.h"
 #include "caffe2/core/tensor.h"
 #include "caffe2/core/types.h"

 namespace caffe2 {

 class CuDNNActivationOpBase : public Operator<CUDAContext> {
  public:
   USE_OPERATOR_FUNCTIONS(CUDAContext);

   template <class... Args>
   explicit CuDNNActivationOpBase(Args&&... args)
       : Operator<CUDAContext>(std::forward<Args>(args)...),
         cudnn_wrapper_(&context_) {
     CUDNN_ENFORCE(cudnnCreateTensorDescriptor(&data_desc_));
     CUDNN_ENFORCE(cudnnCreateActivationDescriptor(&act_desc_));
   }

   virtual ~CuDNNActivationOpBase() {
     CUDNN_ENFORCE(cudnnDestroyTensorDescriptor(data_desc_));
     CUDNN_ENFORCE(cudnnDestroyActivationDescriptor(act_desc_));
   }

  protected:
   void SetTensorDescriptor(
       const cudnnDataType_t data_type,
       const int data_size) {
     if (data_size != input_size_) {
       // Since the best performance is obtained when the tesor is HW-packed, we
       // put X.size() to W.
       input_size_ = data_size;
       CUDNN_ENFORCE(cudnnSetTensor4dDescriptor(
           data_desc_,
           GetCudnnTensorFormat(StorageOrder::NCHW),
           data_type,
           1,
           1,
           1,
           input_size_));
     }
   }

   CuDNNWrapper cudnn_wrapper_;
   cudnnTensorDescriptor_t data_desc_;
   cudnnActivationDescriptor_t act_desc_;

   int input_size_ = 0;
 };

 template <cudnnActivationMode_t kCuDNNActivationMode>
 class CuDNNActivationOp final : public CuDNNActivationOpBase {
  public:
   USE_OPERATOR_FUNCTIONS(CUDAContext);

   template <class... Args>
   explicit CuDNNActivationOp(Args&&... args)
       : CuDNNActivationOpBase(std::forward<Args>(args)...) {
     CUDNN_ENFORCE(cudnnSetActivationDescriptor(
         act_desc_, kCuDNNActivationMode, CUDNN_PROPAGATE_NAN, 0.0));
   }

   bool RunOnDevice() override {
     return DispatchHelper<TensorTypes<float, at::Half>>::call(this, Input(0));
   }

   template <typename T>
   bool DoRunWithType() {
     const auto& X = Input(0);

     auto* Y = Output(0, X.sizes(), at::dtype<T>());
     if (X.numel() == 0) {
       Y->template mutable_data<T>();
       return true;
     }
     this->SetTensorDescriptor(cudnnTypeWrapper<T>::type, X.numel());
     CUDNN_ENFORCE(cudnnActivationForward(
         this->cudnn_wrapper_.inline_cudnn_handle(),
         this->act_desc_,
         cudnnTypeWrapper<T>::kOne(),
         this->data_desc_,
         X.template data<T>(),
         cudnnTypeWrapper<T>::kZero(),
         this->data_desc_,
         Y->template mutable_data<T>()));
     return true;
   }
 };

 template <cudnnActivationMode_t kCuDNNActivationMode>
 class CuDNNActivationGradientOp final : public CuDNNActivationOpBase {
  public:
   USE_OPERATOR_FUNCTIONS(CUDAContext);

   template <class... Args>
   explicit CuDNNActivationGradientOp(Args&&... args)
       : CuDNNActivationOpBase(std::forward<Args>(args)...) {
     CUDNN_ENFORCE(cudnnSetActivationDescriptor(
         act_desc_, kCuDNNActivationMode, CUDNN_PROPAGATE_NAN, 0.0));
   }

   bool RunOnDevice() override {
     return DispatchHelper<TensorTypes<float, at::Half>>::call(this, Input(0));
   }

   template <typename T>
   bool DoRunWithType() {
     const auto& Y = Input(0);
     const auto& dY = Input(1);

     auto* dX = Output(0, Y.sizes(), at::dtype<T>());
     if (Y.numel() == 0) {
       dX->template mutable_data<T>();
       return true;
     }
     this->SetTensorDescriptor(cudnnTypeWrapper<T>::type, Y.numel());
     CUDNN_ENFORCE(cudnnActivationBackward(
         this->cudnn_wrapper_.inline_cudnn_handle(),
         this->act_desc_,
         cudnnTypeWrapper<T>::kOne(),
         this->data_desc_,
         Y.template data<T>(),
         this->data_desc_,
         dY.template data<T>(),
         this->data_desc_,
         Y.template data<T>(), // Use Y_data as placeholder here.
         cudnnTypeWrapper<T>::kZero(),
         this->data_desc_,
         dX->template mutable_data<T>()));
     return true;
   }
 };

 } // namespace caffe2

 #endif // CAFFE2_OPERATORS_ACTIVATION_OPS_CUDNN_H_
caffe2::CuDNNActivationOpBase
Definition: activation_ops_cudnn.h:12

caffe2::GetCudnnTensorFormat
cudnnTensorFormat_t GetCudnnTensorFormat(const StorageOrder &order)
A wrapper function to convert the Caffe storage order to cudnn storage order enum values...
Definition: common_cudnn.h:192

caffe2::Operator< CUDAContext >::Input
const Tensor & Input(int idx, DeviceType type=CUDAContext::GetDeviceType())
Retrieve a non-owning reference to the input at position &#39;idx&#39; for this operator. ...
Definition: operator.h:702

caffe2
A global dictionary that holds information about what Caffe2 modules have been loaded in the current ...
Definition: blob.h:13

caffe2::DispatchHelper
Definition: operator.h:1052

caffe2::CuDNNActivationOp
Definition: activation_ops_cudnn.h:56

caffe2::Operator
Definition: operator.h:677

caffe2::CuDNNWrapper
CuDNNWrapper is a class that wraps the cudnn handles and cudnn workspaces.
Definition: cudnn_wrappers.h:142

caffe2::CuDNNActivationGradientOp
Definition: activation_ops_cudnn.h:95

caffe2::CUDAContext
Definition: context_gpu.h:164

caffe2::cudnnTypeWrapper
cudnnTypeWrapper is a wrapper class that allows us to refer to the cudnn type in a template function...
Definition: common_cudnn.h:120