Caffe2 - C++ API
A deep learning, cross platform ML framework
selu_op.cc
1 #include "caffe2/operators/selu_op.h"
2 
3 #include "caffe2/utils/math.h"
4 
5 namespace caffe2 {
6 
7 template <>
8 bool SeluOp<float, CPUContext>::RunOnDevice() {
9  auto& X = Input(0);
10  auto* Y = Output(0);
11  Y->ResizeLike(X);
12 
13  ConstEigenVectorArrayMap<float> Xvec(X.data<float>(), X.size());
14  EigenVectorArrayMap<float> Yvec(Y->mutable_data<float>(), Y->size());
15  Yvec = lambda_ * (Xvec > 0).select(Xvec, (alpha_ * Xvec.exp() - alpha_));
16  return true;
17 }
18 
19 template <>
20 bool SeluGradientOp<float, CPUContext>::RunOnDevice() {
21  auto& Y = Input(0);
22  auto& dY = Input(1);
23  auto* dX = Output(0);
24  CAFFE_ENFORCE_EQ(dY.size(), Y.size());
25  dX->ResizeLike(Y);
26 
27  ConstEigenVectorArrayMap<float> Yvec(Y.data<float>(), Y.size());
28  ConstEigenVectorArrayMap<float> dYvec(dY.data<float>(), dY.size());
29  EigenVectorArrayMap<float> dXvec(dX->mutable_data<float>(), dX->size());
30 
31  const float la = lambda_ * alpha_;
32  dXvec = (Yvec > 0).select(lambda_ * dYvec, dYvec * (Yvec + la));
33  return true;
34 }
35 
36 REGISTER_CPU_OPERATOR(Selu, SeluOp<float, CPUContext>);
37 REGISTER_CPU_OPERATOR(SeluGradient, SeluGradientOp<float, CPUContext>);
38 
39 // Input: X; output: Y
40 OPERATOR_SCHEMA(Selu)
41  .NumInputs(1)
42  .NumOutputs(1)
43  .AllowInplace({{0, 0}})
44  .IdenticalTypeAndShape()
45  .SetDoc(R"DOC(
46 Selu takes one input data (Tensor<T>) and produces one output data
47 (Tensor<T>) where the function, y = scale*(alpha_*e^x-alpha_ if x < 0 else x),
48 is applied to the tensor elementwise.
49 )DOC")
50  .Arg(
51  "alpha",
52  "(float) default to 1.6732~; affects the activation function itself. "
53  "This should go with the weight initialization in the paper. "
54  " See https://arxiv.org/abs/1706.02515 ")
55  .Arg(
56  "scale",
57  "(float) default to 1.0507~; affects the activation function itself.")
58  .Input(0, "X", "input tensor")
59  .Output(0, "Y", "input tensor");
60 
61 // Input: Y, dY; output: dX
62 OPERATOR_SCHEMA(SeluGradient)
63  .NumInputs(2)
64  .NumOutputs(1)
65  .AllowInplace({{1, 0}})
66  .SetDoc(R"DOC(
67 SeluGradient takes both Y and dY and uses this to update dX according to the
68 chain rule and derivatives of the selu function.
69 )DOC")
70  .Arg(
71  "alpha",
72  "(float) default to 1.6732~; affects the activation function itself."
73  "This should go with the weight initialization in the paper. "
74  " See https://arxiv.org/abs/1706.02515 ")
75  .Arg(
76  "scale",
77  "(float) default to 1.0507~; affects the activation function itself.")
78  .Input(0, "Y", "input tensor")
79  .Input(1, "dY", "input tensor");
80 
81 class GetSeluGradient : public GradientMakerBase {
82  using GradientMakerBase::GradientMakerBase;
83  vector<OperatorDef> GetGradientDefs() override {
84  return SingleGradientDef(
85  def_.type() + "Gradient",
86  "",
87  vector<string>{O(0), GO(0)},
88  vector<string>{GI(0)});
89  }
90 };
91 REGISTER_GRADIENT(Selu, GetSeluGradient);
92 
93 } // namespace caffe2
Copyright (c) 2016-present, Facebook, Inc.
static vector< OperatorDef > SingleGradientDef(const Args &...args)
a helper function to allow one to create one single operator def, which is usually the case for many ...