doxygen-c/html/softmax__op__cudnn_8cc_source.html

 #include "caffe2/core/context_gpu.h"
 #include "caffe2/core/cudnn_wrappers.h"
 #include "caffe2/core/types.h"
 #include "caffe2/operators/softmax_op.h"

 namespace caffe2 {

 namespace {
 constexpr int NUM_DESCRIPTORS = 2;
 constexpr int GRADIENT_NUM_DESCRIPTORS = 3;
 constexpr int BOTTOM_DESC_ID = 0;
 constexpr int TOP_DESC_ID = 1;
 constexpr int TOP_GRADIENT_DESC_ID = 2;
 }  // namespace

 class CuDNNSoftmaxOp final : public Operator<CUDAContext> {
  public:
   template <class... Args>
   explicit CuDNNSoftmaxOp(Args&&... args)
       : Operator<CUDAContext>(std::forward<Args>(args)...),
         cudnn_wrapper_(&context_),
         axis_(OperatorBase::GetSingleArgument<int>("axis", 1)) {
     CUDNN_ENFORCE(cudnnCreateTensorDescriptor(&desc_));
   }

   ~CuDNNSoftmaxOp() override {
     CUDNN_ENFORCE(cudnnDestroyTensorDescriptor(desc_));
   }

   template <typename T>
   bool DoRunWithType() {
     auto& X = Input(0);

     const auto canonical_axis = X.canonical_axis_index(axis_);
     const int N = X.size_to_dim(canonical_axis);
     const int D = X.size_from_dim(canonical_axis);

     auto* Y = Output(0, X.sizes(), at::dtype<T>());
     auto* Y_data = Y->template mutable_data<T>();
     if (N == 0) {
       return true;
     }
     if (dims_ != X.sizes()) {
       CUDNN_ENFORCE(cudnnSetTensor4dDescriptor(
           desc_,
           GetCudnnTensorFormat(StorageOrder::NCHW),
           cudnnTypeWrapper<T>::type,
           N,
           D,
           1,
           1));
       dims_ = X.sizes().vec();
     }
     CUDNN_ENFORCE(cudnnSoftmaxForward(
         cudnn_wrapper_.inline_cudnn_handle(),
         CUDNN_SOFTMAX_ACCURATE,
         CUDNN_SOFTMAX_MODE_INSTANCE,
         cudnnTypeWrapper<T>::kOne(),
         desc_,
         X.template data<T>(),
         cudnnTypeWrapper<T>::kZero(),
         desc_,
         Y_data));
     return true;
   }

   bool RunOnDevice() override {
     return DispatchHelper<TensorTypes<float, at::Half>>::call(this, Input(0));
   }

  protected:
   CuDNNWrapper cudnn_wrapper_;
   int axis_;
   cudnnTensorDescriptor_t desc_;
   vector<int64_t> dims_;
 };


 class CuDNNSoftmaxGradientOp final : public Operator<CUDAContext> {
  public:
   template <class... Args>
   explicit CuDNNSoftmaxGradientOp(Args&&... args)
       : Operator<CUDAContext>(std::forward<Args>(args)...),
         cudnn_wrapper_(&context_),
         axis_(OperatorBase::GetSingleArgument<int>("axis", 1)) {
     CUDNN_ENFORCE(cudnnCreateTensorDescriptor(&desc_));
   }

   ~CuDNNSoftmaxGradientOp() override {
     CUDNN_ENFORCE(cudnnDestroyTensorDescriptor(desc_));
   }

   template <typename T>
   bool DoRunWithType() {
     auto& Y = Input(0);
     auto& dY = Input(1);

     const auto canonical_axis = Y.canonical_axis_index(axis_);
     const int N = Y.size_to_dim(canonical_axis);
     const int D = Y.size_from_dim(canonical_axis);

     CHECK_EQ(Y.sizes(), dY.sizes());
     auto* dX = Output(0, Y.sizes(), at::dtype<T>());
     auto* dX_data = dX->template mutable_data<T>();
     if (N == 0) {
       return true;
     }
     if (dims_ != Y.sizes()) {
       CUDNN_ENFORCE(cudnnSetTensor4dDescriptor(
           desc_,
           GetCudnnTensorFormat(StorageOrder::NCHW),
           cudnnTypeWrapper<T>::type,
           N,
           D,
           1,
           1));
       dims_ = Y.sizes().vec();
     }
     CUDNN_ENFORCE(cudnnSoftmaxBackward(
         cudnn_wrapper_.inline_cudnn_handle(),
         CUDNN_SOFTMAX_ACCURATE,
         CUDNN_SOFTMAX_MODE_INSTANCE,
         cudnnTypeWrapper<T>::kOne(),
         desc_,
         Y.template data<T>(),
         desc_,
         dY.template data<T>(),
         cudnnTypeWrapper<T>::kZero(),
         desc_,
         dX_data));
     return true;
   }

   bool RunOnDevice() override {
     return DispatchHelper<TensorTypes<float, at::Half>>::call(this, Input(0));
   }

  protected:
   CuDNNWrapper cudnn_wrapper_;
   int axis_;
   cudnnTensorDescriptor_t desc_;
   vector<int64_t> dims_;
 };

 namespace {
 REGISTER_CUDNN_OPERATOR(Softmax, CuDNNSoftmaxOp);
 REGISTER_CUDNN_OPERATOR(SoftmaxGradient, CuDNNSoftmaxGradientOp);
 }  // namespace
 }  // namespace caffe2
caffe2::GetCudnnTensorFormat
cudnnTensorFormat_t GetCudnnTensorFormat(const StorageOrder &order)
A wrapper function to convert the Caffe storage order to cudnn storage order enum values...
Definition: common_cudnn.h:192

caffe2::Operator< CUDAContext >::Input
const Tensor & Input(int idx, DeviceType type=CUDAContext::GetDeviceType())
Retrieve a non-owning reference to the input at position &#39;idx&#39; for this operator. ...
Definition: operator.h:702

caffe2
A global dictionary that holds information about what Caffe2 modules have been loaded in the current ...
Definition: blob.h:13

caffe2::CuDNNSoftmaxOp
Definition: softmax_op_cudnn.cc:16

caffe2::CuDNNSoftmaxGradientOp
Definition: softmax_op_cudnn.cc:79

caffe2::DispatchHelper
Definition: operator.h:1052

caffe2::Operator
Definition: operator.h:677

Softmax
Definition: OpClasses.h:637

D
Definition: static.cpp:70

caffe2::CuDNNWrapper
CuDNNWrapper is a class that wraps the cudnn handles and cudnn workspaces.
Definition: cudnn_wrappers.h:142

caffe2::CuDNNWrapper::inline_cudnn_handle
cudnnHandle_t inline_cudnn_handle()
Returns the inline cudnn handle that executes on the current thread&#39;s cuda_stream.
Definition: cudnn_wrappers.h:154

caffe2::cudnnTypeWrapper
cudnnTypeWrapper is a wrapper class that allows us to refer to the cudnn type in a template function...
Definition: common_cudnn.h:120