Caffe2 - C++ API
A deep learning, cross platform ML framework
relu_op.cc
1 
17 #include "caffe2/operators/relu_op.h"
18 
19 #include "caffe2/utils/math.h"
20 
21 namespace caffe2 {
22 
23 template <>
24 bool ReluOp<float, CPUContext>::RunOnDevice() {
25  auto& X = Input(0);
26  auto* Y = Output(0);
27  Y->ResizeLike(X);
28 
29 #ifdef CAFFE2_USE_ACCELERATE
30  const float zero = 0.0f;
31  vDSP_vthres(X.data<float>(), 1, &zero, Y->mutable_data<float>(), 1, X.size());
32 #else
33  EigenVectorMap<float>(Y->mutable_data<float>(), X.size()) =
34  ConstEigenVectorMap<float>(X.data<float>(), X.size()).cwiseMax(0.f);
35 #endif
36  /* Naive implementation
37  const float* Xdata = X.data<float>();
38  float* Ydata = Y->mutable_data<float>();
39  for (int i = 0; i < X.size(); ++i) {
40  Ydata[i] = std::max(Xdata[i], 0.f);
41  }
42  */
43  return true;
44 }
45 
46 template <>
47 bool ReluGradientOp<float, CPUContext>::RunOnDevice() {
48  auto& Y = Input(0);
49  auto& dY = Input(1);
50  auto* dX = Output(0);
51  CAFFE_ENFORCE_EQ(dY.size(), Y.size());
52  dX->ResizeLike(Y);
53 
54  const float* Ydata = Y.data<float>();
55  const float* dYdata = dY.data<float>();
56  float* dXdata = dX->mutable_data<float>();
57  // TODO: proper vectorization with Eigen
58  EigenVectorArrayMap<float> dXvec(dXdata, dX->size());
59  ConstEigenVectorArrayMap<float> Yvec(Ydata, Y.size());
60  ConstEigenVectorArrayMap<float> dYvec(dYdata, dY.size());
61  dXvec = dYvec * Yvec.cwiseSign();
62  /* Previous implementation
63  for (int i = 0; i < Y.size(); ++i) {
64  dXdata[i] = Ydata[i] > 0 ? dYdata[i] : 0;
65  }
66  */
67  return true;
68 }
69 
70 namespace {
71 OpSchema::Cost CostInferenceForRelu(
72  const OperatorDef& def,
73  const vector<TensorShape>& in) {
74  struct OpSchema::Cost cost = PointwiseCostInference<2>(def, in);
75  if (def.input(0) == def.output(0)) {
76  cost.bytes_moved = 0;
77  }
78  cost.params_bytes = 0;
79  return cost;
80 }
81 } // namespace
82 
83 REGISTER_CPU_OPERATOR(Relu, ReluOp<float, CPUContext>);
84 REGISTER_CPU_OPERATOR(ReluGradient, ReluGradientOp<float, CPUContext>);
85 
86 // Input: X, output: Y
87 OPERATOR_SCHEMA(Relu)
88  .NumInputs(1)
89  .NumOutputs(1)
90  .AllowInplace({{0, 0}})
91  .CostInferenceFunction(CostInferenceForRelu)
92  .IdenticalTypeAndShape()
93  .SetDoc(R"DOC(
94 Relu takes one input data (Tensor<T>) and produces one output data
95 (Tensor<T>) where the rectified linear function, y = max(0, x), is applied to
96 the tensor elementwise.
97 )DOC")
98  .Input(0, "X", "1D input tensor")
99  .Output(0, "Y", "1D input tensor");
100 
101 // Input: Y, dY, output: dX
102 OPERATOR_SCHEMA(ReluGradient)
103  .NumInputs(2)
104  .NumOutputs(1)
105  .AllowInplace({{1, 0}})
106  .SetDoc(R"DOC(
107 ReluGradient takes both Y and dY and uses this to update dX according to the
108 chain rule and derivatives of the rectified linear function.
109 )DOC");
110 
111 class GetReluGradient : public GradientMakerBase {
112  using GradientMakerBase::GradientMakerBase;
113  vector<OperatorDef> GetGradientDefs() override {
114  return SingleGradientDef(
115  def_.type() + "Gradient",
116  "",
117  vector<string>{O(0), GO(0)},
118  vector<string>{GI(0)});
119  }
120 };
121 REGISTER_GRADIENT(Relu, GetReluGradient);
122 REGISTER_GRADIENT(ReluFp16, GetReluGradient);
123 
124 } // namespace caffe2
Copyright (c) 2016-present, Facebook, Inc.
static vector< OperatorDef > SingleGradientDef(const Args &...args)
a helper function to allow one to create one single operator def, which is usually the case for many ...