1 #include "caffe2/operators/softmax_op.h" 2 #include "caffe2/operators/softmax_shared.h" 8 bool SoftmaxOp<float, CPUContext>::RunOnDevice() {
11 const auto canonical_axis = X.canonical_axis_index(axis_);
12 const int N = X.size_to_dim(canonical_axis);
13 const int D = X.size_from_dim(canonical_axis);
14 auto* Y = Output(0, X.sizes(), at::dtype<float>());
15 float* Ydata = Y->template mutable_data<float>();
17 if (!scale_.defined()) {
18 scale_ = caffe2::empty({N}, at::dtype<float>().device(CPU));
19 }
else if (scale_.numel() != N) {
23 if (!rowmax_.defined()) {
24 rowmax_ = caffe2::empty({N}, at::dtype<float>().device(CPU));
25 }
else if (rowmax_.numel() != N) {
29 if (!sum_multiplier_.defined()) {
30 sum_multiplier_ = caffe2::empty({D}, at::dtype<float>().device(CPU));
31 math::Set<float, CPUContext>(D, 1.f, sum_multiplier_.mutable_data<
float>(), &context_);
32 }
else if (sum_multiplier_.numel() != D) {
33 sum_multiplier_.Resize(D);
34 math::Set<float, CPUContext>(D, 1.f, sum_multiplier_.mutable_data<
float>(), &context_);
43 scale_.mutable_data<
float>(),
44 sum_multiplier_.data<
float>(),
46 rowmax_.mutable_data<
float>());
52 bool SoftmaxGradientOp<float, CPUContext>::RunOnDevice() {
56 const auto canonical_axis = Y.canonical_axis_index(axis_);
57 const int64_t N = Y.size_to_dim(canonical_axis);
58 const int64_t D = Y.size_from_dim(canonical_axis);
60 if (!scale_.defined()) {
61 scale_ = caffe2::empty({N}, at::dtype<float>().device(CPU));
62 }
else if (scale_.numel() != N) {
66 if (!sum_multiplier_.defined()) {
67 sum_multiplier_ = caffe2::empty({D}, at::dtype<float>().device(CPU));
68 math::Set<float, CPUContext>(D, 1.f, sum_multiplier_.mutable_data<
float>(), &context_);
69 }
else if (sum_multiplier_.numel() != D) {
70 sum_multiplier_.Resize(D);
71 math::Set<float, CPUContext>(D, 1.f, sum_multiplier_.mutable_data<
float>(), &context_);
74 auto* dX = Output(0, Y.sizes(), at::dtype<float>());
75 const float* Ydata = Y.data<
float>();
76 const float* dYdata = dY.data<
float>();
77 float* dXdata = dX->mutable_data<
float>();
81 context_.CopySameDevice<
float>(Y.numel(), dYdata, dXdata);
82 float* scaledata = scale_.mutable_data<
float>();
83 for (
int i = 0; i < N; ++i) {
84 math::Dot<float, CPUContext>(D, Ydata + i * D, dYdata + i * D,
85 scaledata + i, &context_);
87 math::Gemm<float, CPUContext>(CblasNoTrans, CblasNoTrans, N, D, 1, -1,
88 scaledata, sum_multiplier_.data<
float>(), 1,
90 math::Mul<float, CPUContext>(Y.numel(), dXdata, Ydata, dXdata, &context_);
94 REGISTER_CPU_OPERATOR(
Softmax, SoftmaxOp<float, CPUContext>);
95 REGISTER_CPU_GRADIENT_OPERATOR(
97 SoftmaxGradientOp<float, CPUContext>);
102 .IdenticalTypeAndShape()
105 Applies the Softmax function to an n-dimensional input Tensor rescaling them so 106 that the elements of the n-dimensional output Tensor lie in the range (0,1) and 107 sum to 1. The softmax operator is typically the last layer in a classifier network, 108 as its output can be interpreted as confidence probabilities of an input belonging 109 to each class. The input is a 2-D tensor (Tensor) of size (batch_size x 110 input_feature_dimensions). The output tensor has the same shape and contains the 111 softmax normalized values of the corresponding input. The softmax function is 114 $$softmax(x_i) = \frac{\exp(x_i)}{\sum_{j} \exp(x_j)}$$ 116 The input does not need to explicitly be a 2D vector; rather, it will be coerced 117 into one. For an arbitrary n-dimensional tensor `X` in 118 $[a_0, a_1, ..., a_{k-1}, a_k, ..., a_{n-1}]$, where k is the `axis` provided, 119 then `X` will be coerced into a 2-dimensional tensor with dimensions 120 $[(a_0 * ... * a_{k-1}), (a_k * ... * a_{n-1})]$. For the default case where 121 `axis`=1, the `X` tensor will be coerced into a 2D tensor of dimensions 122 $[a_0, (a_1 * ... * a_{n-1})]$, where $a_0$ is often the batch size. In this 123 situation, we must have $a_0 = N$ and $a_1 * ... * a_{n-1} = D$. Each of these 124 dimensions must be matched correctly, or else the operator will throw errors. 128 - https://github.com/pytorch/pytorch/blob/master/caffe2/operators/softmax_op.h 129 - https://github.com/pytorch/pytorch/blob/master/caffe2/operators/softmax_op.cc 134 <summary> <b>Example</b> </summary> 139 workspace.ResetWorkspace() 141 op = core.CreateOperator( 147 workspace.FeedBlob("X", np.random.randn(1, 5).astype(np.float32)) 148 print("input:", workspace.FetchBlob("X")) 149 workspace.RunOperatorOnce(op) 150 print("softmax:", workspace.FetchBlob("Y")) 157 input: [[ 0.0417839 0.61960053 -0.23150268 -0.64389366 -3.0000346 ]] 158 softmax: [[0.24422921 0.43525138 0.18582782 0.12303016 0.01166145]] 169 "*(type: int; default: 1)* Axis of the inputs when coerced to 2D matrix.")
173 "*(type: Tensor`<float>`)* Input tensor that's coerced into a 2D matrix of size (NxD) as described above.")
177 "*(type: Tensor`<float>`)* The softmax normalized output tensor with the same shape as input tensor.")
178 .InheritOnnxSchema();
181 GRADIENT_OPERATOR_SCHEMA(SoftmaxGradient).NumInputs(2).NumOutputs(1);
183 class GetSoftmaxGradient :
public GradientMakerBase {
184 using GradientMakerBase::GradientMakerBase;
185 vector<OperatorDef> GetGradientDefs()
override {
186 return SingleGradientDef(
187 def_.type() +
"Gradient",
"",
188 vector<string>{O(0), GO(0)},
189 vector<string>{GI(0)});
192 REGISTER_GRADIENT(
Softmax, GetSoftmaxGradient);
193 REGISTER_GRADIENT(SoftmaxFp16, GetSoftmaxGradient);
A global dictionary that holds information about what Caffe2 modules have been loaded in the current ...