doxygen-c/html/softmax__op_8cc_source.html

 #include "caffe2/operators/softmax_op.h"
 #include "caffe2/operators/softmax_shared.h"

 namespace caffe2 {

 // Implementation for the CPU context.
 template <>
 bool SoftmaxOp<float, CPUContext>::RunOnDevice() {
   auto& X = Input(0);

   const auto canonical_axis = X.canonical_axis_index(axis_);
   const int N = X.size_to_dim(canonical_axis);
   const int D = X.size_from_dim(canonical_axis);
   auto* Y = Output(0, X.sizes(), at::dtype<float>());
   float* Ydata = Y->template mutable_data<float>();
   // First, get scales
   if (!scale_.defined()) {
     scale_ = caffe2::empty({N}, at::dtype<float>().device(CPU));
   } else if (scale_.numel() != N) {
     scale_.Resize(N);
   }

   if (!rowmax_.defined()) {
     rowmax_ = caffe2::empty({N}, at::dtype<float>().device(CPU));
   } else if (rowmax_.numel() != N) {
     rowmax_.Resize(N);
   }

   if (!sum_multiplier_.defined()) {
     sum_multiplier_ = caffe2::empty({D}, at::dtype<float>().device(CPU));
     math::Set<float, CPUContext>(D, 1.f, sum_multiplier_.mutable_data<float>(), &context_);
   } else if (sum_multiplier_.numel() != D) {
     sum_multiplier_.Resize(D);
     math::Set<float, CPUContext>(D, 1.f, sum_multiplier_.mutable_data<float>(), &context_);
   }

   SoftmaxCPU(
       context_,
       N,
       D,
       X.data<float>(),
       Ydata,
       scale_.mutable_data<float>(),
       sum_multiplier_.data<float>(),
       false,
       rowmax_.mutable_data<float>());
   return true;
 }

 // Implementation for the CPU context.
 template <>
 bool SoftmaxGradientOp<float, CPUContext>::RunOnDevice() {
   auto& Y = Input(0);
   auto& dY = Input(1);

   const auto canonical_axis = Y.canonical_axis_index(axis_);
   const int64_t N = Y.size_to_dim(canonical_axis);
   const int64_t D = Y.size_from_dim(canonical_axis);
   // First, get scales
   if (!scale_.defined()) {
     scale_ = caffe2::empty({N}, at::dtype<float>().device(CPU));
   } else if (scale_.numel() != N) {
     scale_.Resize(N);
   }

   if (!sum_multiplier_.defined()) {
     sum_multiplier_ = caffe2::empty({D}, at::dtype<float>().device(CPU));
     math::Set<float, CPUContext>(D, 1.f, sum_multiplier_.mutable_data<float>(), &context_);
   } else if (sum_multiplier_.numel() != D) {
     sum_multiplier_.Resize(D);
     math::Set<float, CPUContext>(D, 1.f, sum_multiplier_.mutable_data<float>(), &context_);
   }

   auto* dX = Output(0, Y.sizes(), at::dtype<float>());
   const float* Ydata = Y.data<float>();
   const float* dYdata = dY.data<float>();
   float* dXdata = dX->mutable_data<float>();
   if (N == 0) {
     return true;
   }
   context_.CopySameDevice<float>(Y.numel(), dYdata, dXdata);
   float* scaledata = scale_.mutable_data<float>();
   for (int i = 0; i < N; ++i) {
     math::Dot<float, CPUContext>(D, Ydata + i * D, dYdata + i * D,
                                  scaledata + i, &context_);
   }
   math::Gemm<float, CPUContext>(CblasNoTrans, CblasNoTrans, N, D, 1, -1,
                                 scaledata, sum_multiplier_.data<float>(), 1,
                                 dXdata, &context_);
   math::Mul<float, CPUContext>(Y.numel(), dXdata, Ydata, dXdata, &context_);
   return true;
 }

 REGISTER_CPU_OPERATOR(Softmax, SoftmaxOp<float, CPUContext>);
 REGISTER_CPU_GRADIENT_OPERATOR(
     SoftmaxGradient,
     SoftmaxGradientOp<float, CPUContext>);

 OPERATOR_SCHEMA(Softmax)
     .NumInputs(1)
     .NumOutputs(1)
     .IdenticalTypeAndShape()
     .SetDoc(R"DOC(

 Applies the Softmax function to an n-dimensional input Tensor rescaling them so
 that the elements of the n-dimensional output Tensor lie in the range (0,1) and
 sum to 1. The softmax operator is typically the last layer in a classifier network,
 as its output can be interpreted as confidence probabilities of an input belonging
 to each class. The input is a 2-D tensor (Tensor) of size (batch_size x
 input_feature_dimensions). The output tensor has the same shape and contains the
 softmax normalized values of the corresponding input. The softmax function is
 defined as follows:

 $$softmax(x_i) = \frac{\exp(x_i)}{\sum_{j} \exp(x_j)}$$

 The input does not need to explicitly be a 2D vector; rather, it will be coerced
 into one. For an arbitrary n-dimensional tensor `X` in
 $[a_0, a_1, ..., a_{k-1}, a_k, ..., a_{n-1}]$, where k is the `axis` provided,
 then `X` will be coerced into a 2-dimensional tensor with dimensions
 $[(a_0 * ... * a_{k-1}), (a_k * ... * a_{n-1})]$. For the default case where
 `axis`=1, the `X` tensor will be coerced into a 2D tensor of dimensions
 $[a_0, (a_1 * ... * a_{n-1})]$, where $a_0$ is often the batch size. In this
 situation, we must have $a_0 = N$ and $a_1 * ... * a_{n-1} = D$. Each of these
 dimensions must be matched correctly, or else the operator will throw errors.

 Github Links:

 - https://github.com/pytorch/pytorch/blob/master/caffe2/operators/softmax_op.h
 - https://github.com/pytorch/pytorch/blob/master/caffe2/operators/softmax_op.cc


 <details>

 <summary> <b>Example</b> </summary>

 **Code**

 ```
 workspace.ResetWorkspace()

 op = core.CreateOperator(
     "Softmax",
     ["X"],
     ["Y"]
 )

 workspace.FeedBlob("X", np.random.randn(1, 5).astype(np.float32))
 print("input:", workspace.FetchBlob("X"))
 workspace.RunOperatorOnce(op)
 print("softmax:", workspace.FetchBlob("Y"))

 ```

 **Result**

 ```
 input: [[ 0.0417839   0.61960053 -0.23150268 -0.64389366 -3.0000346 ]]
 softmax: [[0.24422921 0.43525138 0.18582782 0.12303016 0.01166145]]

 ```

 </details>


 )DOC")
     .Arg(
         "axis",
         "*(type: int; default: 1)* Axis of the inputs when coerced to 2D matrix.")
     .Input(
         0,
         "X",
         "*(type: Tensor`<float>`)* Input tensor that's coerced into a 2D matrix of size (NxD) as described above.")
     .Output(
         0,
         "Y",
         "*(type: Tensor`<float>`)* The softmax normalized output tensor with the same shape as input tensor.")
     .InheritOnnxSchema();

 // Input: Y, dY. Output: dX
 GRADIENT_OPERATOR_SCHEMA(SoftmaxGradient).NumInputs(2).NumOutputs(1);

 class GetSoftmaxGradient : public GradientMakerBase {
   using GradientMakerBase::GradientMakerBase;
   vector<OperatorDef> GetGradientDefs() override {
     return SingleGradientDef(
         def_.type() + "Gradient", "",
         vector<string>{O(0), GO(0)},
         vector<string>{GI(0)});
   }
 };
 REGISTER_GRADIENT(Softmax, GetSoftmaxGradient);
 REGISTER_GRADIENT(SoftmaxFp16, GetSoftmaxGradient);

 }  // namespace caffe2
caffe2
A global dictionary that holds information about what Caffe2 modules have been loaded in the current ...
Definition: blob.h:13

Softmax
Definition: OpClasses.h:637

D
Definition: static.cpp:70