1 #include "caffe2/operators/selu_op.h" 3 #include "caffe2/utils/eigen_utils.h" 4 #include "caffe2/utils/math.h" 9 bool SeluOp<float, CPUContext>::RunOnDevice() {
12 auto* Y = Output(0, X.sizes(), at::dtype<float>());
14 ConstEigenVectorArrayMap<float> Xvec(X.data<
float>(), X.numel());
15 EigenVectorArrayMap<float> Yvec(
16 Y->template mutable_data<float>(), Y->numel());
17 Yvec = lambda_ * (Xvec > 0).select(Xvec, (alpha_ * Xvec.exp() - alpha_));
22 bool SeluGradientOp<float, CPUContext>::RunOnDevice() {
26 CAFFE_ENFORCE_EQ(dY.numel(), Y.numel());
27 auto* dX = Output(0, Y.sizes(), at::dtype<float>());
29 ConstEigenVectorArrayMap<float> Yvec(Y.data<
float>(), Y.numel());
30 ConstEigenVectorArrayMap<float> dYvec(dY.data<
float>(), dY.numel());
31 EigenVectorArrayMap<float> dXvec(
32 dX->template mutable_data<float>(), dX->numel());
34 const float la = lambda_ * alpha_;
35 dXvec = (Yvec > 0).select(lambda_ * dYvec, dYvec * (Yvec + la));
39 REGISTER_CPU_OPERATOR(Selu, SeluOp<float, CPUContext>);
40 REGISTER_CPU_OPERATOR(SeluGradient, SeluGradientOp<float, CPUContext>);
46 .AllowInplace({{0, 0}})
47 .IdenticalTypeAndShape()
50 The *Selu* op takes one input tensor $X$, an argument $alpha$, an argument $scale$, and produces one output tensor $Y$ of the same shape as $X.$ The op performs the element wise *Selu* operation, defined as 52 $$y=selu(x) =\begin{cases}scale (\alpha e^{x} - \alpha) & x < 0\\scale * x & otherwise\end{cases}$$ 54 The default value of *alpha* is 1.6732632423543772848170429916717 and the default value of *scale* is 1.0507009873554804934193349852946. See [Self-Normalizing Neural Networks](https://arxiv.org/abs/1706.02515) for more information. 58 - https://github.com/pytorch/pytorch/blob/master/caffe2/operators/selu_op.h 59 - https://github.com/pytorch/pytorch/blob/master/caffe2/operators/selu_op.cc 64 <summary> <b>Example</b> </summary> 70 workspace.ResetWorkspace() 72 op = core.CreateOperator( 78 workspace.FeedBlob("X", np.random.randn(3, 3).astype(np.float32)) 79 print("X:\n", workspace.FetchBlob("X"), "\n") 81 workspace.RunOperatorOnce(op) 82 print("Y:\n", workspace.FetchBlob("Y")) 91 [[ 1.1613879 -0.27111396 -1.2076733 ] 92 [ 1.3442237 -1.0701777 1.2070968 ] 93 [ 0.23810555 0.9740916 -1.7872391 ]] 96 [[ 1.2202715 -0.4174965 -1.2326177 ] 97 [ 1.4123772 -1.1551634 1.2682979 ] 98 [ 0.25017774 1.023479 -1.4637551 ]] 107 "*(type: float; default: 1.673263~)* Alpha constant in equation.")
110 "*(type: float; default: 1.050700~; must be > 1.0)* Scale constant in equation.")
111 .Input(0,
"X",
"Input tensor of data to be operated on.")
112 .Output(0,
"Y",
"Output tensor with same shape as input.")
113 .InheritOnnxSchema();
116 OPERATOR_SCHEMA(SeluGradient)
119 .AllowInplace({{1, 0}})
121 SeluGradient takes both Y and dY and uses this to update dX according to the 122 chain rule and derivatives of the selu function. 126 "(float) default to 1.6732~; affects the activation function itself." 127 "This should go with the weight initialization in the paper. " 128 " See https://arxiv.org/abs/1706.02515 ")
131 "(float) default to 1.0507~; affects the activation function itself.")
132 .Input(0,
"Y",
"input tensor")
133 .Input(1,
"dY",
"input tensor");
136 using GradientMakerBase::GradientMakerBase;
137 vector<OperatorDef> GetGradientDefs()
override {
139 def_.type() +
"Gradient",
141 vector<string>{O(0), GO(0)},
142 vector<string>{GI(0)});
A global dictionary that holds information about what Caffe2 modules have been loaded in the current ...
static vector< OperatorDef > SingleGradientDef(const Args &...args)
a helper function to allow one to create one single operator def, which is usually the case for many ...