Caffe2 - C++ API
A deep learning, cross platform ML framework
softmax_op.cc
1 
17 #include "caffe2/operators/softmax_op.h"
18 #include "caffe2/operators/softmax_shared.h"
19 
20 namespace caffe2 {
21 
22 // Implementation for the CPU context.
23 template <>
24 bool SoftmaxOp<float, CPUContext>::RunOnDevice() {
25  auto& X = Input(0);
26  auto* Y = Output(0);
27  const auto canonical_axis = X.canonical_axis_index(axis_);
28  const int N = X.size_to_dim(canonical_axis);
29  const int D = X.size_from_dim(canonical_axis);
30  Y->ResizeLike(X);
31  float* Ydata = Y->mutable_data<float>();
32  // First, get scales
33  if (scale_.size() != N) {
34  scale_.Resize(N);
35  }
36  if (rowmax_.size() != N) {
37  rowmax_.Resize(N);
38  }
39  if (sum_multiplier_.size() != D) {
40  sum_multiplier_.Resize(D);
41  math::Set<float, CPUContext>(D, 1.f, sum_multiplier_.mutable_data<float>(),
42  &context_);
43  }
44 
45  SoftmaxCPU(
46  context_,
47  N,
48  D,
49  X.data<float>(),
50  Ydata,
51  scale_.mutable_data<float>(),
52  sum_multiplier_.data<float>(),
53  false,
54  rowmax_.mutable_data<float>());
55  return true;
56 }
57 
58 // Implementation for the CPU context.
59 template <>
60 bool SoftmaxGradientOp<float, CPUContext>::RunOnDevice() {
61  auto& Y = Input(0);
62  auto& dY = Input(1);
63  auto* dX = Output(0);
64  const auto canonical_axis = Y.canonical_axis_index(axis_);
65  const int N = Y.size_to_dim(canonical_axis);
66  const int D = Y.size_from_dim(canonical_axis);
67  // First, get scales
68  if (scale_.size() != N) {
69  scale_.Resize(N);
70  }
71  if (sum_multiplier_.size() != D) {
72  sum_multiplier_.Resize(D);
73  math::Set<float, CPUContext>(D, 1.f, sum_multiplier_.mutable_data<float>(),
74  &context_);
75  }
76  dX->ResizeLike(Y);
77  const float* Ydata = Y.data<float>();
78  const float* dYdata = dY.data<float>();
79  float* dXdata = dX->mutable_data<float>();
80  context_.Copy<float, CPUContext, CPUContext>(Y.size(), dYdata, dXdata);
81  float* scaledata = scale_.mutable_data<float>();
82  for (int i = 0; i < N; ++i) {
83  math::Dot<float, CPUContext>(D, Ydata + i * D, dYdata + i * D,
84  scaledata + i, &context_);
85  }
86  math::Gemm<float, CPUContext>(CblasNoTrans, CblasNoTrans, N, D, 1, -1,
87  scaledata, sum_multiplier_.data<float>(), 1,
88  dXdata, &context_);
89  math::Mul<float, CPUContext>(Y.size(), dXdata, Ydata, dXdata,
90  &context_);
91  return true;
92 }
93 
94 REGISTER_CPU_OPERATOR(Softmax, SoftmaxOp<float, CPUContext>);
95 REGISTER_CPU_OPERATOR(SoftmaxGradient, SoftmaxGradientOp<float, CPUContext>);
96 
97 OPERATOR_SCHEMA(Softmax)
98  .NumInputs(1)
99  .NumOutputs(1)
100  .IdenticalTypeAndShape()
101  .SetDoc(R"DOC(
102 The operator computes the softmax normalized values for each layer in the batch
103  of the given input. The input is a 2-D tensor (Tensor<float>) of size
104 (batch_size x input_feature_dimensions). The output tensor has the same shape
105 and contains the softmax normalized values of the corresponding input.
106 
107 X does not need to explicitly be a 2D vector; rather, it will be
108 coerced into one. For an arbitrary n-dimensional tensor
109 X \in [a_0, a_1, ..., a_{k-1}, a_k, ..., a_{n-1}] and k is
110 the axis provided, then X will be coerced into a 2-dimensional tensor with
111 dimensions [a_0 * ... * a_{k-1}, a_k * ... * a_{n-1}]. For the default
112 case where axis=1, this means the X tensor will be coerced into a 2D tensor
113 of dimensions [a_0, a_1 * ... * a_{n-1}], where a_0 is often the batch size.
114 In this situation, we must have a_0 = N and a_1 * ... * a_{n-1} = D.
115 Each of these dimensions must be matched correctly, or else the operator
116 will throw errors.
117 )DOC")
118  .Arg("axis",
119  "(int) default to 1; describes the axis of the inputs when coerced "
120  "to 2D; defaults to one because the 0th axis most likely describes "
121  "the batch_size")
122  .Input(0, "input",
123  "The input tensor that's coerced into a 2D matrix of size (NxD) "
124  "as described above.")
125  .Output(0, "output", "The softmax normalized output values with the same "
126  "shape as input tensor.");
127 
128 // Input: Y, dY. Output: dX
129 OPERATOR_SCHEMA(SoftmaxGradient).NumInputs(2).NumOutputs(1);
130 
132  using GradientMakerBase::GradientMakerBase;
133  vector<OperatorDef> GetGradientDefs() override {
134  return SingleGradientDef(
135  def_.type() + "Gradient", "",
136  vector<string>{O(0), GO(0)},
137  vector<string>{GI(0)});
138  }
139 };
140 REGISTER_GRADIENT(Softmax, GetSoftmaxGradient);
141 REGISTER_GRADIENT(SoftmaxFp16, GetSoftmaxGradient);
142 
143 } // namespace caffe2
Copyright (c) 2016-present, Facebook, Inc.
static vector< OperatorDef > SingleGradientDef(const Args &...args)
a helper function to allow one to create one single operator def, which is usually the case for many ...