Caffe2 - C++ API
A deep learning, cross platform ML framework
selu_op.cc
1 #include "caffe2/operators/selu_op.h"
2 
3 #include "caffe2/utils/eigen_utils.h"
4 #include "caffe2/utils/math.h"
5 
6 namespace caffe2 {
7 
8 template <>
9 bool SeluOp<float, CPUContext>::RunOnDevice() {
10  auto& X = Input(0);
11 
12  auto* Y = Output(0, X.sizes(), at::dtype<float>());
13 
14  ConstEigenVectorArrayMap<float> Xvec(X.data<float>(), X.numel());
15  EigenVectorArrayMap<float> Yvec(
16  Y->template mutable_data<float>(), Y->numel());
17  Yvec = lambda_ * (Xvec > 0).select(Xvec, (alpha_ * Xvec.exp() - alpha_));
18  return true;
19 }
20 
21 template <>
22 bool SeluGradientOp<float, CPUContext>::RunOnDevice() {
23  auto& Y = Input(0);
24  auto& dY = Input(1);
25 
26  CAFFE_ENFORCE_EQ(dY.numel(), Y.numel());
27  auto* dX = Output(0, Y.sizes(), at::dtype<float>());
28 
29  ConstEigenVectorArrayMap<float> Yvec(Y.data<float>(), Y.numel());
30  ConstEigenVectorArrayMap<float> dYvec(dY.data<float>(), dY.numel());
31  EigenVectorArrayMap<float> dXvec(
32  dX->template mutable_data<float>(), dX->numel());
33 
34  const float la = lambda_ * alpha_;
35  dXvec = (Yvec > 0).select(lambda_ * dYvec, dYvec * (Yvec + la));
36  return true;
37 }
38 
39 REGISTER_CPU_OPERATOR(Selu, SeluOp<float, CPUContext>);
40 REGISTER_CPU_OPERATOR(SeluGradient, SeluGradientOp<float, CPUContext>);
41 
42 // Input: X; output: Y
43 OPERATOR_SCHEMA(Selu)
44  .NumInputs(1)
45  .NumOutputs(1)
46  .AllowInplace({{0, 0}})
47  .IdenticalTypeAndShape()
48  .SetDoc(R"DOC(
49 
50 The *Selu* op takes one input tensor $X$, an argument $alpha$, an argument $scale$, and produces one output tensor $Y$ of the same shape as $X.$ The op performs the element wise *Selu* operation, defined as
51 
52 $$y=selu(x) =\begin{cases}scale (\alpha e^{x} - \alpha) & x < 0\\scale * x & otherwise\end{cases}$$
53 
54 The default value of *alpha* is 1.6732632423543772848170429916717 and the default value of *scale* is 1.0507009873554804934193349852946. See [Self-Normalizing Neural Networks](https://arxiv.org/abs/1706.02515) for more information.
55 
56 Github Links:
57 
58 - https://github.com/pytorch/pytorch/blob/master/caffe2/operators/selu_op.h
59 - https://github.com/pytorch/pytorch/blob/master/caffe2/operators/selu_op.cc
60 
61 
62 <details>
63 
64 <summary> <b>Example</b> </summary>
65 
66 **Code**
67 
68 ```
69 
70 workspace.ResetWorkspace()
71 
72 op = core.CreateOperator(
73  "Selu",
74  ["X"],
75  ["Y"],
76 )
77 
78 workspace.FeedBlob("X", np.random.randn(3, 3).astype(np.float32))
79 print("X:\n", workspace.FetchBlob("X"), "\n")
80 
81 workspace.RunOperatorOnce(op)
82 print("Y:\n", workspace.FetchBlob("Y"))
83 
84 ```
85 
86 **Result**
87 
88 ```
89 
90 X:
91  [[ 1.1613879 -0.27111396 -1.2076733 ]
92  [ 1.3442237 -1.0701777 1.2070968 ]
93  [ 0.23810555 0.9740916 -1.7872391 ]]
94 
95 Y:
96  [[ 1.2202715 -0.4174965 -1.2326177 ]
97  [ 1.4123772 -1.1551634 1.2682979 ]
98  [ 0.25017774 1.023479 -1.4637551 ]]
99 
100 ```
101 
102 </details>
103 
104 )DOC")
105  .Arg(
106  "alpha",
107  "*(type: float; default: 1.673263~)* Alpha constant in equation.")
108  .Arg(
109  "scale",
110  "*(type: float; default: 1.050700~; must be > 1.0)* Scale constant in equation.")
111  .Input(0, "X", "Input tensor of data to be operated on.")
112  .Output(0, "Y", "Output tensor with same shape as input.")
113  .InheritOnnxSchema();
114 
115 // Input: Y, dY; output: dX
116 OPERATOR_SCHEMA(SeluGradient)
117  .NumInputs(2)
118  .NumOutputs(1)
119  .AllowInplace({{1, 0}})
120  .SetDoc(R"DOC(
121 SeluGradient takes both Y and dY and uses this to update dX according to the
122 chain rule and derivatives of the selu function.
123 )DOC")
124  .Arg(
125  "alpha",
126  "(float) default to 1.6732~; affects the activation function itself."
127  "This should go with the weight initialization in the paper. "
128  " See https://arxiv.org/abs/1706.02515 ")
129  .Arg(
130  "scale",
131  "(float) default to 1.0507~; affects the activation function itself.")
132  .Input(0, "Y", "input tensor")
133  .Input(1, "dY", "input tensor");
134 
136  using GradientMakerBase::GradientMakerBase;
137  vector<OperatorDef> GetGradientDefs() override {
138  return SingleGradientDef(
139  def_.type() + "Gradient",
140  "",
141  vector<string>{O(0), GO(0)},
142  vector<string>{GI(0)});
143  }
144 };
145 REGISTER_GRADIENT(Selu, GetSeluGradient);
146 
147 } // namespace caffe2
A global dictionary that holds information about what Caffe2 modules have been loaded in the current ...
Definition: blob.h:13
static vector< OperatorDef > SingleGradientDef(const Args &...args)
a helper function to allow one to create one single operator def, which is usually the case for many ...