Caffe2 - C++ API
A deep learning, cross platform ML framework
reduction_ops.cc
1 
17 #include "caffe2/operators/reduction_ops.h"
18 
19 namespace caffe2 {
20 
21 REGISTER_CPU_OPERATOR(SumElements, SumElementsOp<float, CPUContext>);
22 REGISTER_CPU_OPERATOR(SumSqrElements, SumSqrElementsOp<CPUContext>);
23 
24 REGISTER_CPU_OPERATOR(
25  SumElementsGradient,
26  SumElementsGradientOp<float, CPUContext>);
27 
28 REGISTER_CPU_OPERATOR(RowwiseMax, MaxReductionOp<float, CPUContext, true>);
29 REGISTER_CPU_OPERATOR(
30  RowwiseMaxGradient,
31  MaxReductionGradientOp<float, CPUContext, true>);
32 REGISTER_CPU_OPERATOR(
33  ColwiseMaxGradient,
34  MaxReductionGradientOp<float, CPUContext, false>);
35 REGISTER_CPU_OPERATOR(ColwiseMax, MaxReductionOp<float, CPUContext, false>);
36 
37 OPERATOR_SCHEMA(SumElements)
38  .NumInputs(1)
39  .NumOutputs(1)
40  .ScalarType(TensorProto::FLOAT)
41  .SetDoc("Sums the elements of the input tensor.")
42  .Arg("average", "whether to average or not")
43  .Input(0, "X", "Tensor to sum up")
44  .Output(0, "sum", "Scalar sum");
45 
46 OPERATOR_SCHEMA(SumSqrElements)
47  .NumInputs(1)
48  .NumOutputs(1)
49  .ScalarType(TensorProto::FLOAT)
50  .SetDoc("Sums the squares elements of the input tensor.")
51  .Arg("average", "whether to average or not")
52  .Input(0, "X", "Tensor to sum up")
53  .Output(0, "sum", "Scalar sum of squares");
54 
55 OPERATOR_SCHEMA(SumElementsGradient).NumInputs(2).NumOutputs(1);
56 
58  using GradientMakerBase::GradientMakerBase;
59  vector<OperatorDef> GetGradientDefs() override {
60  return SingleGradientDef(
61  "SumElementsGradient",
62  "",
63  vector<string>{I(0), GO(0)},
64  vector<string>{GI(0)});
65  }
66 };
67 REGISTER_GRADIENT(SumElements, GetSumElementsGradient);
68 
69 OPERATOR_SCHEMA(RowwiseMax)
70  .NumInputs(1)
71  .NumOutputs(1)
72  .SetDoc("Compute row-wise max reduction of the input tensor.")
73  .Input(
74  0,
75  "X",
76  "A tenosr of dimensions batch_size x M x N to compute rowwise-max.")
77  .Output(0, "Y", "batch_size x M rowwise-max results matrix.");
78 
79 OPERATOR_SCHEMA(RowwiseMaxGradient).NumInputs(3).NumOutputs(1);
81  using GradientMakerBase::GradientMakerBase;
82  vector<OperatorDef> GetGradientDefs() override {
83  return SingleGradientDef(
84  "RowwiseMaxGradient",
85  "",
86  vector<string>{I(0), O(0), GO(0)},
87  vector<string>{GI(0)});
88  }
89 };
90 REGISTER_GRADIENT(RowwiseMax, GetRowwiseMaxGradient);
91 
92 OPERATOR_SCHEMA(ColwiseMaxGradient);
93 
94 OPERATOR_SCHEMA(ColwiseMax)
95  .NumInputs(1)
96  .NumOutputs(1)
97  .SetDoc("Compute column-wise max reduction of the input tensor.")
98  .Input(
99  0,
100  "X",
101  "A tenosr of dimensions batch_size x M x N to compute colwise-max.")
102  .Output(0, "Y", "batch_size x N column-max results matrix.");
103 
104 OPERATOR_SCHEMA(ColumnMaxGradient).NumInputs(3).NumOutputs(1);
106  using GradientMakerBase::GradientMakerBase;
107  vector<OperatorDef> GetGradientDefs() override {
108  return SingleGradientDef(
109  "ColwiseMaxGradient",
110  "",
111  vector<string>{I(0), O(0), GO(0)},
112  vector<string>{GI(0)});
113  }
114 };
115 REGISTER_GRADIENT(ColwiseMax, GetColwiseMaxGradient);
116 
117 template <typename T, class Context>
119 // TODO: T21635077 fix float-divide-by-zero undefined behavior
120 #if defined(__has_feature)
121 #if __has_feature(__address_sanitizer__)
122  __attribute__((__no_sanitize__("float-divide-by-zero")))
123 #endif
124 #endif
125 {
126  auto& X = Input(0);
127  TensorCPU sum_grad = TensorCPU(Input(1));
128  auto* dX = Output(0);
129  dX->ResizeLike(X);
130  DCHECK_EQ(sum_grad.size(), 1);
131  math::Set<T, Context>(
132  dX->size(),
133  static_cast<T>(sum_grad.data<T>()[0] * (average_ ? 1.0 / X.size() : 1)),
134  dX->template mutable_data<T>(),
135  &context_);
136  return true;
137 }
138 
139 template <typename T, class Context, bool ROWWISE>
141  auto& X = Input(0);
142  auto& Y = Input(1);
143  auto& dY = Input(2);
144 
145  auto* dX = Output(0);
146  dX->ResizeLike(X);
147 
148  CAFFE_ENFORCE_EQ(X.ndim(), 3);
149 
150  const int batch_size = X.dim32(0);
151  const int M = X.dim32(1);
152  const int N = X.dim32(2);
153 
154  const T* Xdata = X.template data<T>();
155  const T* Ydata = Y.template data<T>();
156  const T* dYdata = dY.template data<T>();
157  T* dXdata = dX->template mutable_data<T>();
158 
159  const int input_size = M * N;
160  for (int i = 0; i < batch_size; ++i) {
161  const T* Xdata_i = Xdata + i * input_size;
162  T* dXdata_i = dXdata + i * input_size;
163  if (ROWWISE) {
164  const T* Ydata_i = Ydata + i * M;
165  const T* dYdata_i = dYdata + i * M;
166  for (int m = 0; m < M; ++m) {
167  const T* Xdata_m = Xdata_i + m * N;
168  T* dXdata_m = dXdata_i + m * N;
169  for (int n = 0; n < N; ++n) {
170  if (Xdata_m[n] == Ydata_i[m]) {
171  dXdata_m[n] = dYdata_i[m];
172  } else {
173  dXdata_m[n] = static_cast<T>(0);
174  }
175  }
176  }
177  } else {
178  const T* Ydata_i = Ydata + i * N;
179  const T* dYdata_i = dYdata + i * N;
180  for (int n = 0; n < N; ++n) {
181  for (int m = 0; m < M; ++m) {
182  const T* Xdata_m = Xdata_i + m * N;
183  T* dXdata_m = dXdata_i + m * N;
184  if (Xdata_m[n] == Ydata_i[n]) {
185  dXdata_m[n] = dYdata_i[n];
186  } else {
187  dXdata_m[n] = static_cast<T>(0);
188  }
189  }
190  }
191  }
192  }
193 
194  return true;
195 }
196 
197 } // namespace caffe2
const T * data() const
Returns a typed pointer of the underlying storage.
Definition: tensor.h:500
TIndex size() const
Returns the size (i.e.
Definition: tensor.h:609
Copyright (c) 2016-present, Facebook, Inc.
static vector< OperatorDef > SingleGradientDef(const Args &...args)
a helper function to allow one to create one single operator def, which is usually the case for many ...