Caffe2 - C++ API
A deep learning, cross platform ML framework
reduction_ops.cc
1 #include "caffe2/operators/reduction_ops.h"
2 
3 namespace caffe2 {
4 
5 REGISTER_CPU_OPERATOR(SumElements, SumElementsOp<float, CPUContext>);
6 REGISTER_CPU_OPERATOR(SumElementsInt, SumElementsIntOp<int, CPUContext>);
7 REGISTER_CPU_OPERATOR(SumSqrElements, SumSqrElementsOp<CPUContext>);
8 
9 REGISTER_CPU_OPERATOR(
10  SumElementsGradient,
11  SumElementsGradientOp<float, CPUContext>);
12 
13 REGISTER_CPU_OPERATOR(RowwiseMax, MaxReductionOp<float, CPUContext, true>);
14 REGISTER_CPU_OPERATOR(
15  RowwiseMaxGradient,
16  MaxReductionGradientOp<float, CPUContext, true>);
17 REGISTER_CPU_OPERATOR(
18  ColwiseMaxGradient,
19  MaxReductionGradientOp<float, CPUContext, false>);
20 REGISTER_CPU_OPERATOR(ColwiseMax, MaxReductionOp<float, CPUContext, false>);
21 
22 OPERATOR_SCHEMA(SumElements)
23  .NumInputs(1)
24  .NumOutputs(1)
25  .ScalarType(TensorProto::FLOAT)
26  .SetDoc(R"DOC(
27 Sums the elements of the input tensor. Tensor type must be float32.
28 
29 Github Links:
30 - https://github.com/pytorch/pytorch/blob/master/caffe2/operators/reduction_ops.cc
31 
32 <details>
33 
34 <summary> <b>Example</b> </summary>
35 
36 **Code**
37 
38 ```
39 
40 workspace.ResetWorkspace()
41 
42 sum_op = core.CreateOperator(
43  "SumElements",
44  ["X"],
45  ["Y"]
46 )
47 
48 avg_op = core.CreateOperator(
49  "SumElements",
50  ["X"],
51  ["Y"],
52  average=True
53 )
54 
55 workspace.FeedBlob("X", np.random.randint(10, size=(3,3)).astype(np.float32))
56 print("X:\n", workspace.FetchBlob("X"))
57 workspace.RunOperatorOnce(sum_op)
58 print("Y (sum_op):", workspace.FetchBlob("Y"))
59 workspace.RunOperatorOnce(avg_op)
60 print("Y (avg_op):", workspace.FetchBlob("Y"))
61 
62 ```
63 
64 **Result**
65 
66 ```
67 
68 X:
69  [[7. 2. 5.]
70  [9. 4. 2.]
71  [1. 2. 5.]]
72 Y (sum_op): 37.0
73 Y (avg_op): 4.111111
74 
75 ```
76 
77 </details>
78 
79  )DOC")
80  .Arg("average", "(*bool*): set to True to compute the average of the elements rather than the sum")
81  .Input(0, "X", "(*Tensor`<float>`*): blob pointing to an instance of a counter")
82  .Output(0, "sum", "(*Tensor`<float>`*): Scalar tensor containing the sum (or average)");
83 
84 OPERATOR_SCHEMA(SumElementsInt)
85  .NumInputs(1)
86  .NumOutputs(1)
87  .ScalarType(TensorProto::INT32)
88  .SetDoc("Sums the integer elements of the input tensor.")
89  .Input(0, "X", "Tensor to sum up")
90  .Output(0, "sum", "Scalar sum");
91 SHOULD_NOT_DO_GRADIENT(SumElementsInt);
92 
93 OPERATOR_SCHEMA(SumSqrElements)
94  .NumInputs(1)
95  .NumOutputs(1)
96  .ScalarType(TensorProto::FLOAT)
97  .SetDoc("Sums the squares elements of the input tensor.")
98  .Arg("average", "whether to average or not")
99  .Input(0, "X", "Tensor to sum up")
100  .Output(0, "sum", "Scalar sum of squares");
101 
102 OPERATOR_SCHEMA(SumElementsGradient).NumInputs(2).NumOutputs(1);
103 
105  using GradientMakerBase::GradientMakerBase;
106  vector<OperatorDef> GetGradientDefs() override {
107  return SingleGradientDef(
108  "SumElementsGradient",
109  "",
110  vector<string>{I(0), GO(0)},
111  vector<string>{GI(0)});
112  }
113 };
114 REGISTER_GRADIENT(SumElements, GetSumElementsGradient);
115 
116 OPERATOR_SCHEMA(RowwiseMax)
117  .NumInputs(1)
118  .NumOutputs(1)
119  .SetDoc(R"DOC(
120 Compute row-wise max reduction of the input tensor. This op takes one input, $X$, of shape $BxMxN$, where $B$ is the batch size, $M$ is number of rows, and $N$ is number of columns. The output of this op, $Y$, is a matrix of shape $BxM$, with one row for each element of the batch, and the same number of columns as the number of rows of the input tensor.
121 
122 Github Links:
123 - https://github.com/pytorch/pytorch/blob/master/caffe2/operators/reduction_ops.h
124 - https://github.com/pytorch/pytorch/blob/master/caffe2/operators/reduction_ops.cc
125 
126 <details>
127 
128 <summary> <b>Example</b> </summary>
129 
130 **Code**
131 
132 ```
133 
134 workspace.ResetWorkspace()
135 
136 op = core.CreateOperator(
137  "RowwiseMax",
138  ["X"],
139  ["Y"]
140 )
141 
142 // Create X, simulating a batch of 2, 4x4 matricies
143 X = np.random.randint(0,high=20,size=(2,4,4))
144 print("X:\n",X)
145 
146 // Feed X into workspace
147 workspace.FeedBlob("X", X.astype(np.float32))
148 
149 // Run op
150 workspace.RunOperatorOnce(op)
151 
152 // Collect Output
153 print("Y:\n", workspace.FetchBlob("Y"))
154 
155 ```
156 
157 **Result**
158 
159 ```
160 
161 X:
162  [[[ 5 12 10 1]
163  [ 4 16 2 15]
164  [ 5 11 12 15]
165  [15 4 17 19]]
166 
167  [[16 5 5 13]
168  [17 2 1 17]
169  [18 3 19 5]
170  [14 16 10 16]]]
171 Y:
172  [[12. 16. 15. 19.]
173  [16. 17. 19. 16.]]
174 
175 ```
176 
177 </details>
178 
179  )DOC")
180  .Input(
181  0,
182  "X",
183  "A tensor of dimensions $B x M x N$ to compute rowwise-max. Here, $B$ is batch size, and $M$ and $N$ are the number of rows and columns of each element of the batch, respectively.")
184  .Output(
185  0,
186  "Y",
187  "The output tensor of shape $B x M$, where each row represents the row-wise maximums for that element of the input batch.");
188 
189 OPERATOR_SCHEMA(RowwiseMaxGradient).NumInputs(3).NumOutputs(1);
190 class GetRowwiseMaxGradient : public GradientMakerBase {
191  using GradientMakerBase::GradientMakerBase;
192  vector<OperatorDef> GetGradientDefs() override {
193  return SingleGradientDef(
194  "RowwiseMaxGradient",
195  "",
196  vector<string>{I(0), O(0), GO(0)},
197  vector<string>{GI(0)});
198  }
199 };
200 REGISTER_GRADIENT(RowwiseMax, GetRowwiseMaxGradient);
201 
202 OPERATOR_SCHEMA(ColwiseMaxGradient);
203 
204 OPERATOR_SCHEMA(ColwiseMax)
205  .NumInputs(1)
206  .NumOutputs(1)
207  .SetDoc(R"DOC(
208 Compute column-wise max reduction of the input tensor. This op takes one input, $X$, of shape $BxMxN$, where $B$ is the batch size, $M$ is number of rows, and $N$ is number of columns. The output of this op, $Y$, is a matrix of shape $BxN$, with one row for each element of the batch, and the same number of columns as the input tensor.
209 
210 Github Links:
211 - https://github.com/pytorch/pytorch/blob/master/caffe2/operators/reduction_ops.h
212 - https://github.com/pytorch/pytorch/blob/master/caffe2/operators/reduction_ops.cc
213 
214 <details>
215 
216 <summary> <b>Example</b> </summary>
217 
218 **Code**
219 
220 ```
221 workspace.ResetWorkspace()
222 
223 op = core.CreateOperator(
224  "ColwiseMax",
225  ["X"],
226  ["Y"]
227 )
228 
229 // Create X, simulating a batch of 2, 4x4 matricies
230 X = np.random.randint(0,high=20,size=(2,4,4))
231 print("X:\n",X)
232 
233 // Feed X into workspace
234 workspace.FeedBlob("X", X.astype(np.float32))
235 
236 // Run op
237 workspace.RunOperatorOnce(op)
238 
239 // Collect Output
240 print("Y:\n", workspace.FetchBlob("Y"))
241 
242 ```
243 
244 **Result**
245 
246 ```
247 
248 X:
249  [[[17 15 2 6]
250  [ 8 12 6 0]
251  [ 6 9 7 3]
252  [ 4 13 16 13]]
253 
254  [[ 0 3 4 12]
255  [18 1 17 12]
256  [ 7 17 13 14]
257  [12 17 2 1]]]
258 Y:
259  [[17. 15. 16. 13.]
260  [18. 17. 17. 14.]]
261 
262 ```
263 
264 </details>
265 
266  )DOC")
267  .Input(
268  0,
269  "X",
270  "A tensor of dimensions $B x M x N$ to compute columnwise-max. Here, $B$ is batch size, and $M$ and $N$ are the number of rows and columns of each element of the batch, respectively.")
271  .Output(
272  0,
273  "Y",
274  "The output tensor of shape $B x N$, where each row represents the column-wise maximums for that element of the input batch.");
275 
276 OPERATOR_SCHEMA(ColumnMaxGradient).NumInputs(3).NumOutputs(1);
277 class GetColwiseMaxGradient : public GradientMakerBase {
278  using GradientMakerBase::GradientMakerBase;
279  vector<OperatorDef> GetGradientDefs() override {
280  return SingleGradientDef(
281  "ColwiseMaxGradient",
282  "",
283  vector<string>{I(0), O(0), GO(0)},
284  vector<string>{GI(0)});
285  }
286 };
287 REGISTER_GRADIENT(ColwiseMax, GetColwiseMaxGradient);
288 
289 template <typename T, class Context>
291 // TODO: T21635077 fix float-divide-by-zero undefined behavior
292 #if defined(__has_feature)
293 #if __has_feature(__address_sanitizer__)
294  __attribute__((__no_sanitize__("float-divide-by-zero")))
295 #endif
296 #endif
297 {
298  auto& X = Input(0);
299  Tensor sum_grad(Input(1), CPU);
300 
301  auto* dX = Output(0, X.sizes(), at::dtype<T>());
302  DCHECK_EQ(sum_grad.numel(), 1);
303  math::Set<T, Context>(
304  dX->numel(),
305  static_cast<T>(
306  sum_grad.template data<T>()[0] * (average_ ? 1.0 / X.numel() : 1)),
307  dX->template mutable_data<T>(),
308  &context_);
309  return true;
310 }
311 
312 template <typename T, class Context, bool ROWWISE>
314  auto& X = Input(0);
315  auto& Y = Input(1);
316  auto& dY = Input(2);
317 
318  auto* dX = Output(0, X.sizes(), at::dtype<T>());
319 
320  CAFFE_ENFORCE_EQ(X.dim(), 3);
321 
322  const int batch_size = X.dim32(0);
323  const int M = X.dim32(1);
324  const int N = X.dim32(2);
325 
326  const T* Xdata = X.template data<T>();
327  const T* Ydata = Y.template data<T>();
328  const T* dYdata = dY.template data<T>();
329  T* dXdata = dX->template mutable_data<T>();
330 
331  const int input_size = M * N;
332  for (int i = 0; i < batch_size; ++i) {
333  const T* Xdata_i = Xdata + i * input_size;
334  T* dXdata_i = dXdata + i * input_size;
335  if (ROWWISE) {
336  const T* Ydata_i = Ydata + i * M;
337  const T* dYdata_i = dYdata + i * M;
338  for (int m = 0; m < M; ++m) {
339  const T* Xdata_m = Xdata_i + m * N;
340  T* dXdata_m = dXdata_i + m * N;
341  for (int n = 0; n < N; ++n) {
342  if (Xdata_m[n] == Ydata_i[m]) {
343  dXdata_m[n] = dYdata_i[m];
344  } else {
345  dXdata_m[n] = static_cast<T>(0);
346  }
347  }
348  }
349  } else {
350  const T* Ydata_i = Ydata + i * N;
351  const T* dYdata_i = dYdata + i * N;
352  for (int n = 0; n < N; ++n) {
353  for (int m = 0; m < M; ++m) {
354  const T* Xdata_m = Xdata_i + m * N;
355  T* dXdata_m = dXdata_i + m * N;
356  if (Xdata_m[n] == Ydata_i[n]) {
357  dXdata_m[n] = dYdata_i[n];
358  } else {
359  dXdata_m[n] = static_cast<T>(0);
360  }
361  }
362  }
363  }
364  }
365 
366  return true;
367 }
368 
369 } // namespace caffe2
Definition: any.cpp:108
A global dictionary that holds information about what Caffe2 modules have been loaded in the current ...
Definition: blob.h:13
static vector< OperatorDef > SingleGradientDef(const Args &...args)
a helper function to allow one to create one single operator def, which is usually the case for many ...