Caffe2 - C++ API
A deep learning, cross platform ML framework
utility_ops.cc
1 #include "caffe2/operators/utility_ops.h"
2 #include <cmath>
3 #include "caffe2/utils/eigen_utils.h"
4 
5 namespace caffe2 {
6 
7 template <>
8 bool WeightedSumOp<CPUContext>::RunOnDevice() {
9  return DoRunWithType<float>();
10 }
11 
12 template <>
13 bool WeightedSumGradientOp<CPUContext>::RunOnDevice() {
14  return DoRunWithType<float>();
15 }
16 
17 std::vector<TensorShape> WeightedSumShapeInference(
18  const OperatorDef& /* unused */,
19  const vector<TensorShape>& in) {
20  vector<TensorShape> out(1);
21  out[0] = in[0];
22  return out;
23 }
24 
25 OpSchema::Cost CostInferenceForWeightedSum(
26  const OperatorDef& /* unused */,
27  const vector<TensorShape>& in) {
28  CAFFE_ENFORCE_EQ(
29  in.size() % 2, 0, "WeightedSum requires an even number of inputs");
30  struct OpSchema::Cost c;
31 
32  const auto& X0 = in[0];
33  const auto& nElem = nElemFromDim(X0);
34  const auto& nInputs = in.size();
35  c.flops = (nInputs - 1) * nElem;
36  c.bytes_read = (nInputs / 2) * (nElem + 1) * sizeof(X0.data_type());
37  c.bytes_written = nElem * sizeof(X0.data_type());
38  c.params_bytes = (nInputs / 2) * sizeof(X0.data_type());
39  return c;
40 }
41 
42 REGISTER_CPU_OPERATOR(WallClockTime, WallClockTimeOp<CPUContext>);
43 REGISTER_CPU_OPERATOR(Print, PrintOp<CPUContext>);
44 REGISTER_CPU_OPERATOR(FlattenToVec, FlattenToVecOp<CPUContext>);
45 REGISTER_CPU_OPERATOR(Alias, AliasOp<CPUContext>);
46 REGISTER_CPU_OPERATOR(ResizeLike, ResizeLikeOp<CPUContext>);
47 REGISTER_CPU_OPERATOR(SumInt, SumOp<CPUContext>);
48 REGISTER_CPU_OPERATOR(WeightedSum, WeightedSumOp<CPUContext>);
49 REGISTER_CPU_OPERATOR(WeightedSumGradient, WeightedSumGradientOp<CPUContext>);
50 REGISTER_CPU_OPERATOR(
51  ScatterWeightedSum,
52  ScatterWeightedSumOp<float, CPUContext>);
53 REGISTER_CPU_OPERATOR(ScatterAssign, ScatterAssignOp<CPUContext>);
54 
55 REGISTER_CPU_OPERATOR(LengthsToShape, LengthsToShapeOp<CPUContext>);
56 REGISTER_CPU_OPERATOR(HasElements, HasElementsOp<CPUContext>);
57 REGISTER_CPU_OPERATOR(GatherRanges, GatherRangesOp<CPUContext>);
58 REGISTER_CPU_OPERATOR(LengthsGather, LengthsGatherOp<CPUContext>);
59 REGISTER_CPU_OPERATOR(LengthsToSegmentIds, LengthsToSegmentIdsOp<CPUContext>);
60 REGISTER_CPU_OPERATOR(LengthsToRanges, LengthsToRangesOp<CPUContext>);
61 REGISTER_CPU_OPERATOR(SegmentIdsToLengths, SegmentIdsToLengthsOp<CPUContext>);
62 REGISTER_CPU_OPERATOR(SegmentIdsToRanges, SegmentIdsToRangesOp<CPUContext>);
63 REGISTER_CPU_OPERATOR(LengthsToWeights, LengthsToWeightsOp<CPUContext>);
64 REGISTER_CPU_OPERATOR(EnsureDense, EnsureDenseOp<CPUContext>);
65 REGISTER_CPU_OPERATOR(
66  AccumulateHistogram,
67  AccumulateHistogramOp<float, CPUContext>);
68 
69 OPERATOR_SCHEMA(WallClockTime)
70  .NumInputs(0)
71  .NumOutputs(1)
72  .SetDoc("Time since epoch in nanoseconds.")
73  .Output(0, "time", "The time in nanoseconds.");
74 
75 OPERATOR_SCHEMA(Print)
76  .NumInputs(1)
77  .NumOutputs(0)
78  .SetDoc("Logs shape and contents of input tensor to stderr or to a file.")
79  .Arg(
80  "to_file",
81  "(bool) if 1, saves contents to the root folder of the current "
82  "workspace, appending the tensor contents to a file named after "
83  "the blob name. Otherwise, logs to stderr.")
84  .Arg(
85  "limit",
86  "(int, default 0) If set, prints the first `limit` elements of tensor. "
87  "If 0, prints the first `k_limit_default`(1000) elements of tensor")
88  .Arg(
89  "every_n",
90  "(int, default 1) Print tensor every `every_n` runs")
91  .Input(0, "tensor", "The tensor to print.");
92 
93 OPERATOR_SCHEMA(LengthsToShape)
94  .NumInputs(1)
95  .NumOutputs(1)
96  .SetDoc(R"DOC(
97 This operator takes a list of $N$ equal integers as input which represent the lengths of $N$ vectors. The output is the calculated shape of the matrix if the $N$ integers were combined into a single matrix.
98 
99 Github Links:
100 
101 - https://github.com/pytorch/pytorch/blob/master/caffe2/operators/utility_ops.h
102 - https://github.com/pytorch/pytorch/blob/master/caffe2/operators/utility_ops.cc
103 
104 
105 <details>
106 
107 <summary> <b>Example</b> </summary>
108 
109 **Code**
110 
111 ```
112 
113 workspace.ResetWorkspace()
114 
115 op = core.CreateOperator(
116  "LengthsToShape",
117  ["X"],
118  ["Y"]
119 )
120 
121 // Create X: Sample softmax output for 5-class model
122 X = np.array([2,2,2,2,2,2,2,2,2,2])
123 print("X:\n",X)
124 
125 // Feed X into workspace
126 workspace.FeedBlob("X", X.astype(np.int32))
127 
128 // Run op
129 workspace.RunOperatorOnce(op)
130 
131 // Collect Output
132 print("Y:\n", workspace.FetchBlob("Y"))
133 
134 ```
135 
136 **Result**
137 
138 ```
139 
140 X:
141  [2 2 2 2 2 2 2 2 2 2]
142 Y:
143  [10 2]
144 
145 ```
146 
147 </details>
148 
149  )DOC")
150  .Input(
151  0,
152  "X",
153  "List, of length $N$, of equal integers representing the lengths of several vectors.")
154  .Output(
155  0,
156  "Y",
157  "Vector of length 2 describing the dimensions of the data if the $N$ vectors from the input were combined to a single matrix.");
158 OPERATOR_SCHEMA(FlattenToVec)
159  .NumInputs(1)
160  .NumOutputs(1)
161  .TensorInferenceFunction([](const OperatorDef& /*def*/,
162  const vector<TensorShape>& in) {
163  vector<TensorShape> out(1);
164  int total = 1;
165  for (auto d : in[0].dims()) {
166  total *= d;
167  }
168  out[0].set_data_type(in[0].data_type());
169  out[0].add_dims(total);
170  return out;
171  })
172  .SetDoc(R"DOC(
173 
174 The *FlattenToVec* op flattens the input tensor into a 1-D vector. The op accepts a single input tensor and returns a single output tensor.
175 
176 Github Links:
177 
178 - https://github.com/caffe2/caffe2/blob/master/caffe2/operators/utility_ops.cc
179 - https://github.com/caffe2/caffe2/blob/master/caffe2/operators/utility_ops.h
180 
181 
182 <details>
183 
184 <summary> <b>Example</b> </summary>
185 
186 **Code**
187 
188 ```
189 
190 workspace.ResetWorkspace()
191 
192 op = core.CreateOperator(
193  "FlattenToVec",
194  ["input"],
195  ["output"],
196 )
197 
198 workspace.FeedBlob("input", np.array([[1,2,3],[4,5,6],[7,8,9],[10,11,12]]).astype(np.float32))
199 print("input:\n", workspace.FetchBlob("input"))
200 
201 workspace.RunOperatorOnce(op)
202 print("output: \n", workspace.FetchBlob("output"))
203 
204 ```
205 
206 **Result**
207 
208 ```
209 
210 input:
211  [[ 1. 2. 3.]
212  [ 4. 5. 6.]
213  [ 7. 8. 9.]
214  [10. 11. 12.]]
215 output:
216  [ 1. 2. 3. 4. 5. 6. 7. 8. 9. 10. 11. 12.]
217 
218 ```
219 
220 </details>
221 
222 )DOC")
223  .Input(0, "input", "A tensor of rank >= 1.")
224  .Output(0, "output", "A tensor of rank 1 (vector) with the contents of the input tensor.");
225 
226 OPERATOR_SCHEMA(Alias)
227  .NumInputs(1)
228  .NumOutputs(1)
229  .IdenticalTypeAndShape()
230  .SetDoc(R"DOC(
231 Makes the output and the input share the same underlying storage.
232 
233 WARNING: in general, in caffe2's operator interface different tensors should
234 have different underlying storage, which is the assumption made by
235 components such as the dependency engine and memory optimization. Thus, in
236 normal situations you should not use the AliasOp, especially in a normal
237 forward-backward pass.
238 
239 The Alias op is provided so one can achieve true asynchrony, such as
240 Hogwild, in a graph. But make sure you understand all the implications
241 similar to multi-thread computation before you use it explicitly.
242 )DOC")
243  .Input(0, "input", "Input tensor whose storage will be shared.")
244  .Output(0, "output", "Tensor of same shape as input, sharing its storage.");
245 
246 OPERATOR_SCHEMA(ResizeLike)
247  .NumInputs(2)
248  .NumOutputs(1)
249  .TensorInferenceFunction([](const OperatorDef& /*def*/,
250  const vector<TensorShape>& in) {
251  vector<TensorShape> out(1);
252  out.push_back(in[1]);
253  out[0].set_data_type(in[0].data_type());
254  return out;
255  })
256  .SetDoc(R"DOC(
257 Produces tensor containing data of first input and shape of second input.
258 )DOC")
259  .Input(0, "data", "Tensor whose data will be copied into the output.")
260  .Input(1, "shape_tensor", "Tensor whose shape will be applied to output.")
261  .Output(0, "output", "Tensor with data of input 0 and shape of input 1.");
262 
263 OPERATOR_SCHEMA(SumInt)
264  .NumInputs(1, INT_MAX)
265  .NumOutputs(1)
266  .InputsCanCrossDevices()
267  .TensorInferenceFunction([](const OperatorDef& /*def*/,
268  const vector<TensorShape>& in) {
269  vector<TensorShape> out(1);
270  out.push_back(in[0]);
271  out[0].set_data_type(TensorProto::INT32);
272  return out;
273  })
274  .AllowInplace({{0, 0}});
275 
276 OPERATOR_SCHEMA(WeightedSum)
277  .NumInputs([](int n) { return (n > 0 && n % 2 == 0); })
278  .NumOutputs(1)
279  .TensorInferenceFunction(WeightedSumShapeInference)
280  .CostInferenceFunction(CostInferenceForWeightedSum)
281  .AllowInplace({{0, 0}})
282  .IdenticalTypeAndShapeOfInput(0)
283  .SetDoc(R"DOC(
284 Element-wise weighted sum of several data, weight tensor pairs.
285 Input should be in the form X_0, weight_0, X_1, weight_1, ... where X_i all
286 have the same shape, and weight_i are size 1 tensors that specifies the weight
287 of each vector. Note that if one wants to do in-place computation, it could
288 only be done with X_0 also as the output, but not other X_i.
289 )DOC")
290  .Input(0, "data_0", "First of the input tensors.")
291  .Input(0, "weight_0", "Weight of the first input in the sum.")
292  .Output(0, "output", "Result containing weighted elem-wise sum of inputs.");
293 
294 OPERATOR_SCHEMA(WeightedSumGradient)
295  .NumInputs([](int n) { return (n > 0 && n % 2 == 1); })
296  .NumOutputs(1, INT_MAX);
297 
298 OPERATOR_SCHEMA(ScatterWeightedSum)
299  .NumInputs([](int n) { return (n > 3 && (n - 3) % 2 == 0); })
300  .NumOutputs(1)
301  .EnforceInplace({{0, 0}})
302  .SetDoc(R"DOC(
303 Similar to WeightedSum, computes the weighted sum of several tensors, with
304 the difference that inputs are sliced tensors. The first tensor has to be
305 in-place and only slices of it on the first dimension as indexed by INDICES
306 will be updated.
307 
308 Note: The op pretty much ignores the exact shapes of the input arguments and
309 cares only about sizes. It's done for performance consideration to avoid
310 unnecessary reshapes. Only first dimension of X_0 is important, let's call it
311 N. If M is the total size of X_0 and K is the size of INDICES then X_i is
312 assumed to be of shape K x (M / N) regardless of the real shape.
313 
314 Note: Each update in INDICES is applied independently which means that if
315 duplicated elements are present in INDICES the corresponding slice of X_0
316 will be scaled multiple times. Manual collapsing of INDICES is required
317 beforehand if necessary.
318 
319 Note: Updates are applied sequentially by inputs which might have undesired
320 consequences if the input tensor is accessed concurrently by different op
321 (e.g. when doing Hogwild). Other threads might see intermediate results even
322 on individual slice level, e.g. X_0 scaled by weight_0 but without any
323 updates applied.
324 
325 Currently only works on CPU because of access to INDICES.
326 )DOC")
327  .Input(0, "X_0", "Tensor to be updated.")
328  .Input(
329  1,
330  "Weight_0",
331  "Scalar weight for X_0, applied only to slices affected.")
332  .Input(
333  2,
334  "INDICES",
335  "1-D list of indices on the first dimension of X_0 "
336  "that need to be updated")
337  .Input(3, "X_1", "Update slices, with shape len(INDICES) + shape(X_0)[1:]")
338  .Input(4, "Weight_1", "Scalar weight for X_1 update")
339  .Output(0, "X_0", "Has to be exactly the same tensor as the input 0")
340  .EnforceInplace({{0, 0}});
341 
342 OPERATOR_SCHEMA(ScatterAssign)
343  .NumInputs(3)
344  .NumOutputs(1)
345  .EnforceInplace({{0, 0}})
346  .SetDoc(R"DOC(
347 Update slices of the tensor in-place by overriding current value.
348 
349 Note: The op pretty much ignores the exact shapes of the input arguments and
350 cares only about sizes. It's done for performance consideration to avoid
351 unnecessary reshapes. Only first dimension of X_0 is important, let's call it
352 N. If M is the total size of X_0 and K is the size of INDICES then X_i is
353 assumed to be of shape K x (M / N) regardless of the real shape.
354 
355 Note: Each update in INDICES is applied independently which means that if
356 duplicated elements are present in INDICES arbitrary one will win.
357 
358 Currently only works on CPU because of access to INDICES.
359 )DOC")
360  .Input(0, "DATA", "Tensor to be updated.")
361  .Input(
362  1,
363  "INDICES",
364  "1-D list of indices on the first dimension"
365  "of X_0 that need to be updated")
366  .Input(
367  2,
368  "SLICES",
369  "Update slices, with shape len(INDICES) + shape(X_0)[1:]")
370  .Output(0, "DATA", "Has to be exactly the same tensor as the input 0");
371 
372 
373 OPERATOR_SCHEMA(HasElements)
374  .NumInputs(1)
375  .NumOutputs(1)
376  .SetDoc(R"DOC(
377 The *HasElements* op accepts a single input $tensor$, and produces a single boolean output $has\_elements$. The output is *True* if and only if $tensor$ has size > 0. Note, this op is the opposite of the *IsEmpty* op.
378 
379 Github Links:
380 
381 - https://github.com/caffe2/caffe2/blob/master/caffe2/operators/utility_ops.cc
382 - https://github.com/caffe2/caffe2/blob/master/caffe2/operators/utility_ops.h
383 
384 
385 <details>
386 
387 <summary> <b>Example</b> </summary>
388 
389 **Code**
390 
391 ```
392 
393 workspace.ResetWorkspace()
394 
395 op = core.CreateOperator(
396  "HasElements",
397  ["tensor"],
398  ["has_elements"],
399 )
400 
401 // Use a not-empty tensor
402 workspace.FeedBlob("tensor", np.random.randn(2, 2).astype(np.float32))
403 print("tensor:\n", workspace.FetchBlob("tensor"))
404 
405 workspace.RunOperatorOnce(op)
406 print("has_elements: ", workspace.FetchBlob("has_elements"),"\n")
407 
408 // Use an empty tensor
409 workspace.FeedBlob("tensor", np.empty(0))
410 print("tensor:\n", workspace.FetchBlob("tensor"))
411 
412 workspace.RunOperatorOnce(op)
413 print("has_elements: ", workspace.FetchBlob("has_elements"))
414 
415 ```
416 
417 **Result**
418 
419 ```
420 
421 tensor:
422  [[ 0.6116506 -0.54433197]
423  [ 0.19406661 -0.7338629 ]]
424 has_elements: True
425 
426 tensor:
427  []
428 has_elements: False
429 
430 ```
431 
432 </details>
433 
434 )DOC")
435  .Input(0, "tensor", "Input data tensor to check for elements.")
436  .Output(0, "has_elements", "Output scalar boolean tensor. True if input has size > 0.");
437 
438 OPERATOR_SCHEMA(GatherRanges)
439  .NumInputs(2)
440  .NumOutputs(2)
441  .DisallowInputFillers()
442  .SetDoc(R"DOC(
443 Given DATA tensor of rank 1, and RANGES tensor of rank 3, gather
444 corresponding ranges into a 1-D tensor OUTPUT.
445 
446 RANGES dimentions description:
447 1: represents list of examples within a batch
448 2: represents list features
449 3: two values which are start and length or a range (to be applied on DATA)
450 
451 Another output LENGTHS represents each example length within OUTPUT
452 
453 Example:
454  DATA = [1, 2, 3, 4, 5, 6]
455  RANGES = [
456  [
457  [0, 1],
458  [2, 2],
459  ],
460  [
461  [4, 1],
462  [5, 1],
463  ]
464  ]
465  OUTPUT = [1, 3, 4, 5, 6]
466  LENGTHS = [3, 2]
467 )DOC")
468  .Input(0, "DATA", "Tensor of rank 1.")
469  .Input(
470  1,
471  "RANGES",
472  "Tensor of int32/int64 ranges, of dims (N, M, 2). "
473  "Where N is number of examples and M is a size of each example. "
474  "Last dimension represents a range in the format (start, lengths)")
475  .Output(0, "OUTPUT", "1-D tensor of size sum of range lengths")
476  .Output(
477  1,
478  "LENGTHS",
479  "1-D tensor of size N with lengths over gathered data"
480  " for each row in a batch. sum(LENGTHS) == OUTPUT.size()")
481  .TensorInferenceFunction([](const OperatorDef& /* unused */,
482  const vector<TensorShape>& in) {
483  std::vector<TensorShape> out(2);
484 
485  int total = 1;
486  for (auto d : in[0].dims()) {
487  total *= d;
488  }
489  out[0].add_dims(total);
490  out[0].set_data_type(in[0].data_type());
491  out[1].add_dims(in[1].dims(0));
492  out[1].set_data_type(in[1].data_type());
493  return out;
494  });
495 
496 OPERATOR_SCHEMA(LengthsGather)
497  .NumInputs(3)
498  .NumOutputs(1)
499  .SetDoc(R"DOC(
500 Gather items from sparse tensor. Sparse tensor is described by items and
501 lengths. This operator gathers items corresponding to lengths at the given
502 indices. This deliberately doesn't return lengths of OUTPUTS so that both lists
503 and maps can be supported without special cases. If you need lengths tensor for
504  OUTPUT, use `Gather`.
505 
506 Example:
507  ITEMS = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
508  LENGTHS = [0, 2, 3, 1, 4]
509  INDICES = [0, 2, 4]
510 
511  OUTPUT = [2, 3, 4, 6, 7, 8, 9]
512 )DOC")
513  .Input(0, "ITEMS", "items tensor")
514  .Input(1, "LENGTHS", "lengths tensor")
515  .Input(2, "INDICES", "indices into LENGTHS where items should be gathered")
516  .Output(0, "OUTPUT", "1-D tensor containing gathered items");
517 
518 OPERATOR_SCHEMA(LengthsToSegmentIds)
519  .NumInputs(1)
520  .NumOutputs(1)
521  .DisallowInputFillers() // TODO: enable the filler
522  .SetDoc(R"DOC(
523 Given a vector of segment lengths (*lengths*) the *LengthsToSegmentIds* op returns a zero-based, consecutive vector of segment ids (*segment_ids*). For example, *lengths=[1, 3, 0, 2]* will produce *segment_ids=[0, 1, 1, 1, 3, 3]*. In general, the inverse operation is *SegmentIdsToLengths*. Notice though that trailing empty sequence lengths can't be properly recovered from segment ids.
524 
525 Github Links:
526 
527 - https://github.com/caffe2/caffe2/blob/master/caffe2/operators/utility_ops.cc
528 - https://github.com/caffe2/caffe2/blob/master/caffe2/operators/utility_ops.h
529 
530 
531 <details>
532 
533 <summary> <b>Example</b> </summary>
534 
535 **Code**
536 
537 ```
538 
539 workspace.ResetWorkspace()
540 
541 op = core.CreateOperator(
542  "LengthsToSegmentIds",
543  ["lengths"],
544  ["segment_ids"],
545 )
546 
547 workspace.FeedBlob("lengths", np.array([1, 3, 0, 2]).astype(np.int32))
548 print("lengths:\n", workspace.FetchBlob("lengths"))
549 
550 workspace.RunOperatorOnce(op)
551 print("segment_ids: \n", workspace.FetchBlob("segment_ids"))
552 
553 ```
554 
555 **Result**
556 
557 ```
558 
559 lengths:
560  [1 3 0 2]
561 segment_ids:
562  [0 1 1 1 3 3]
563 
564 ```
565 
566 </details>
567 
568 )DOC")
569  .Input(0, "lengths", "1D tensor of int32 or int64 segment lengths.")
570  .Output(0, "segment_ids", "1D tensor of length *sum(lengths)*");
571 
572 OPERATOR_SCHEMA(LengthsToRanges)
573  .NumInputs(1)
574  .NumOutputs(1)
575  .TensorInferenceFunction([](const OperatorDef& /* unused */,
576  const vector<TensorShape>& in) {
577  vector<int> out_shape(in[0].dims().begin(), in[0].dims().end());
578  out_shape.push_back(2);
579  return vector<TensorShape>{
580  CreateTensorShape(out_shape, in[0].data_type())};
581  })
582  .SetDoc(R"DOC(
583 Given a vector of segment lengths, calculates offsets of each segment and packs
584 them next to the lengths. For the input vector of length N the output is a Nx2
585 matrix with (offset, lengths) packaged for each segment.
586 
587 For example, `[1, 3, 0, 2]` transforms into `[[0, 1], [1, 3], [4, 0], [4, 2]]`.
588 )DOC")
589  .Input(0, "lengths", "1D tensor of int32 segment lengths.")
590  .Output(
591  0,
592  "ranges",
593  "2D tensor of shape len(lengths) X 2 and the same type as `lengths`");
594 
595 OPERATOR_SCHEMA(SegmentIdsToLengths)
596  .NumInputs(1, 2)
597  .NumOutputs(1)
598  .DisallowInputFillers() // TODO: enable the filler
599  .SetDoc(R"DOC(
600 Transfers a vector of segment ids to a vector of segment lengths. This operation
601 supports non-consecutive segment ids. Segments not appearing in the input vector
602 will have length 0. If the second input is provided, the number of segments =
603 the size of its first dimension. Otherwise, the number of segments = the last
604 index in the first input vector + 1.
605 
606 In general, for consecutive, zero-based segment IDs, this is the inverse
607 operation of LengthsToSegmentIds, except that a vector of segment IDs
608 cannot represent empty segments at the end (if the second input is absent).
609 )DOC")
610  .Input(0, "segment_ids", "1-D int32_t or int64_t tensor of segment ids")
611  .Input(
612  1,
613  "data (optional)",
614  "if provided, number of segments = the size of its first dimension")
615  .Output(0, "lengths", "1-D int64_t tensor of segment lengths");
616 
617 OPERATOR_SCHEMA(SegmentIdsToRanges)
618  .NumInputs(1, 2)
619  .NumOutputs(1)
620  .DisallowInputFillers() // TODO: enable the filler
621  .SetDoc(R"DOC(
622 Transfers a vector of segment ids to a vector of segment ranges. This operation
623 supports non-consecutive segment ids. Segments not appearing in the input vector
624 will have length 0. If the second input is provided, the number of segments =
625 the size of its first dimension. Otherwise, the number of segments = the last
626 index in the first input vector + 1.
627 )DOC")
628  .Input(0, "segment_ids", "1-D int32_t or int64_t tensor of segment ids")
629  .Input(
630  1,
631  "data (optional)",
632  "if provided, number of segments = the size of its first dimension")
633  .Output(0, "lengths", "1-D int64_t tensor of segment lengths");
634 
635 OPERATOR_SCHEMA(LengthsToWeights)
636  .NumInputs(1)
637  .NumOutputs(1)
638  .Arg("power", "n of 1/pow(length,n) for normalization")
639  .SetDoc(R"DOC(
640 Similar as LengthsToSegmentIds but output vector of segment
641 weights derived by lengths. i.e 1/pow(length, power)
642 )DOC")
643  .Input(0, "lengths", "1-D int32_t or int64_t tensor of lengths")
644  .Output(0, "a vector of weights", "1-D float tensor of weights by length");
645 
646 
647 
648 SHOULD_NOT_DO_GRADIENT(WallClockTime);
649 
650 OPERATOR_SCHEMA(EnsureDense)
651  .NumInputs(1)
652  .NumOutputs(1)
653  .AllowInplace({{0, 0}})
654  .IdenticalTypeAndShape()
655  .SetDoc(R"DOC(
656 This operator converts dense or sparse gradients to dense ones.
657 Therefore, sparse gradient can be back propagated to Operators that consume
658 dense gradients only (e.g., FCGradient).
659 
660 The operator's behaviors:
661 
662 - In forward, simply pass in place or copy input to the output.
663 - In backward, if the gradient passed-in is sparse gradient, change it to dense gradient in linear time; otherwise, simply pass the dense gradient.
664 )DOC")
665  .Input(0, "input", "Input tensors.")
666  .Output(0, "output", "Output tensor. Same dimension as inputs.");
667 
668 OPERATOR_SCHEMA(AccumulateHistogram)
669  .NumInputs(1)
670  .NumOutputs(2)
671  .SetDoc(R"DOC(
672 This operator calculate thes histogram of values in input tensor.
673 There're 2 outputs, one for histogram of current input tensor, and another
674 for histogram of the all input tensors accumulated through history.
675 The output would contain num_buckets + 2 values. index[1 ... num_buckets]
676 for values in [lower_bound, upper_bound) interval. And the rest 2 for values
677 smaller than lower_bound or greater than upper_bound respectively.
678 )DOC")
679  .Input(0, "X", "Input tensor.")
680  .Output(0, "CurHist", "Output histogram of the current tensor.")
681  .Output(1, "AccHist", "Accumulated histogram of the history tensor.")
682  .Arg("lower_bound", "the lower bound value")
683  .Arg("upper_bound", "the upper bound value")
684  .Arg(
685  "num_buckets",
686  "number of buckets to use in [lower_bound, upper_bound)");
687 
688 class GetEnsureDenseGradient : public GradientMakerBase {
689  using GradientMakerBase::GradientMakerBase;
690  vector<OperatorDef> GetGradientDefs() override {
691  CAFFE_ENFORCE(
692  GradOut(0).IsSparse() || GradOut(0).IsDense(),
693  "Input gradient ",
694  O(0),
695  " should be either sparse or dense.");
696 
697  if (GradOut(0).IsDense()) {
698  SetDense(0, GO(0));
699  return vector<OperatorDef>();
700  } else {
701  return SingleGradientDef(
702  "SparseToDense",
703  "",
704  vector<string>{GO_I(0), GO_V(0), I(0)},
705  vector<string>{GI(0)});
706  }
707  }
708 };
709 REGISTER_GRADIENT(EnsureDense, GetEnsureDenseGradient);
710 
711 SHOULD_NOT_DO_GRADIENT(Print);
712 SHOULD_NOT_DO_GRADIENT(HasElements);
713 SHOULD_NOT_DO_GRADIENT(IsEmpty);
714 SHOULD_NOT_DO_GRADIENT(LengthsToShape);
715 
716 class GetAliasGradient : public GradientMakerBase {
717  using GradientMakerBase::GradientMakerBase;
718  vector<OperatorDef> GetGradientDefs() override {
719  // We will simply pass-along the gradient. Nothing needs to
720  // be calculated.
721  SetDense(0, GO(0));
722  return vector<OperatorDef>();
723  }
724 };
725 REGISTER_GRADIENT(Alias, GetAliasGradient);
726 
727 SHOULD_NOT_DO_GRADIENT(ResizeLike);
728 
729 class GetSumGradient : public GradientMakerBase {
730  using GradientMakerBase::GradientMakerBase;
731  vector<OperatorDef> GetGradientDefs() override {
732  for (auto i = 0; i < def_.input_size(); ++i) {
733  SetDense(i, GO(0));
734  }
735  return vector<OperatorDef>();
736  }
737 };
738 REGISTER_GRADIENT(Sum, GetSumGradient);
739 
740 SHOULD_NOT_DO_GRADIENT(ScatterWeightedSum);
741 SHOULD_NOT_DO_GRADIENT(ScatterAssign);
742 
743 class GetWeightedSumGradient : public GradientMakerBase {
744  using GradientMakerBase::GradientMakerBase;
745  vector<OperatorDef> GetGradientDefs() override {
746  ArgumentHelper argsHelper(def_);
747  const bool grad_on_w = argsHelper.GetSingleArgument<bool>("grad_on_w", 0);
748 
749  auto inputs = vector<string>{GO(0)};
750  auto outputs = vector<string>();
751  for (int i = 0; i < def_.input_size(); i += 2) {
752  inputs.push_back(I(i));
753  inputs.push_back(I(i + 1));
754  outputs.push_back(GI(i));
755  }
756 
757  if (grad_on_w) {
758  for (int i = 0; i < def_.input_size(); i += 2) {
759  outputs.push_back(GI(i + 1));
760  }
761  }
762 
763  return SingleGradientDef("WeightedSumGradient", "", inputs, outputs);
764  }
765 };
766 REGISTER_GRADIENT(WeightedSum, GetWeightedSumGradient);
767 
768 struct GetFlattenToVecGradient : public GradientMakerBase {
769  using GradientMakerBase::GradientMakerBase;
770  vector<OperatorDef> GetGradientDefs() override {
771  return SingleGradientDef(
772  "ResizeLike", "", vector<string>{GO(0), I(0)}, vector<string>{GI(0)});
773  }
774 };
775 REGISTER_GRADIENT(FlattenToVec, GetFlattenToVecGradient);
776 
777 SHOULD_NOT_DO_GRADIENT(LengthsToSegmentIds);
778 SHOULD_NOT_DO_GRADIENT(SegmentIdsToLengths);
779 SHOULD_NOT_DO_GRADIENT(SegmentIdsToRanges);
780 SHOULD_NOT_DO_GRADIENT(SegmentIdsToLengthWeights);
781 SHOULD_NOT_DO_GRADIENT(GatherRangesOp);
782 SHOULD_NOT_DO_GRADIENT(LengthsGather);
783 SHOULD_NOT_DO_GRADIENT(AccumulateHistogram);
784 
785 template <>
786 bool NanCheckOp<CPUContext>::RunOnDevice() {
787  auto& X = Input(0);
788  auto* Y = Output(0);
789  const int D = X.numel();
790  const float* data = X.data<float>();
791  ConstEigenVectorMap<float> input_data(data, D);
792 
793  bool all_finite = input_data.allFinite();
794 
795  if (!all_finite) {
796  std::cerr << "Tensor contained NaN or inf: [" << this->debug_def().input(0)
797  << "]" << std::endl;
798 
799  for (int j = 0; j < InputSize(); j++) {
800  std::cerr << "Tensor name: " << this->debug_def().input(j) << std::endl;
801  std::cerr << "Input tensor:" << std::endl;
802  tensorPrinter_.Print<float>(Input(j));
803  std::cerr << "NaN idxs:" << std::endl;
804  const float* x = Input(j).data<float>();
805  for (size_t i = 0; i < Input(j).numel(); ++i) {
806  if (std::isnan(x[i]) || std::isinf(x[i])) {
807  std::cerr << i << " ";
808  }
809  }
810  std::cerr << std::endl;
811  }
812  return false;
813  }
814 
815  if (&X != Y) {
816  Y->CopyFrom(X);
817  }
818  return true;
819 }
820 REGISTER_CPU_OPERATOR(NanCheck, NanCheckOp<CPUContext>);
821 REGISTER_GRADIENT(NanCheck, GetNanCheckGradient);
822 
823 OPERATOR_SCHEMA(NanCheck)
824  .NumInputs(1, INT_MAX)
825  .NumOutputs(1)
826  .AllowInplace({{0, 0}})
827  .IdenticalTypeAndShapeOfInput(0)
828  .SetDoc("Identity operator, but checks all values for nan or inf")
829  .Input(0, "tensor", "Tensor to check for nan/inf")
830  .Output(
831  0,
832  "output",
833  "Tensor to copy input into if no NaNs or inf."
834  " Can be in-place");
835 
836 REGISTER_CPU_OPERATOR(IsNaN, IsNanOp<CPUContext>);
837 
838 OPERATOR_SCHEMA(IsNaN)
839  .NumInputs(1)
840  .NumOutputs(1)
841  .SetDoc("Returns a new tensor with boolean elements representing if each element is NaN or not.")
842  .Input(0, "tensor", "Tensor to check for nan")
843  .Output(0, "output", "Tensor containing a 1 at each location of NaN elements.");
844 
845 OPERATOR_SCHEMA(Size)
846  .NumInputs(1)
847  .NumOutputs(1)
848  .SetDoc(R"DOC(
849 Return a 1D tensor of type *int64* that contains the number of elements of the input tensor.
850 
851 Github Link:
852 - https://github.com/pytorch/pytorch/blob/master/caffe2/operators/utility_ops.cc
853 
854 <details>
855 
856 <summary> <b>Example</b> </summary>
857 
858 **Code**
859 
860 ```
861 
862 workspace.ResetWorkspace()
863 
864 op = core.CreateOperator(
865  "Size",
866  ["X"],
867  ["size"],
868 )
869 
870 workspace.FeedBlob("X", (np.random.randint(10, size=(3,3))))
871 print("X:", workspace.FetchBlob("X"))
872 workspace.RunOperatorOnce(op)
873 print("size:", workspace.FetchBlob("size"))
874 
875 workspace.ResetWorkspace()
876 
877 workspace.FeedBlob("X", (np.random.rand(6,4)))
878 print("X:", workspace.FetchBlob("X"))
879 workspace.RunOperatorOnce(op)
880 print("size:", workspace.FetchBlob("size"))
881 
882 ```
883 
884 **Result**
885 
886 ```
887 
888 X:
889 [[3 7 0]
890  [0 1 6]
891  [5 0 8]]
892 size: 9
893 X:
894 [[0.92017884 0.32115368 0.68692035 0.64135016]
895  [0.8723328 0.77830265 0.80688656 0.25524236]
896  [0.37970216 0.76407047 0.85689564 0.30692883]
897  [0.69352573 0.42531502 0.16415212 0.59209324]
898  [0.52684188 0.37094846 0.60670079 0.6489272 ]
899  [0.94715906 0.34800557 0.61898769 0.28947359]]
900 size: 24
901 
902 ```
903 
904 </details>
905 
906  )DOC")
907  .Input(0, "X", "*(type: Tensor)* Input tensor to calculate number of elements.")
908  .Output(
909  0,
910  "size",
911  "*(type: Tensor)* 1D tensor of type int64 that contains the number of "
912  "elements in the input tensor *X*.");
913 
914 REGISTER_CPU_OPERATOR(Size, SizeOp<CPUContext>);
915 NO_GRADIENT(Size);
916 
917 template <>
918 template <typename T>
919 bool RangeOp<CPUContext>::DoRunOnDevice(
920  const T& start,
921  const T& step,
922  Tensor* output) {
923  auto* output_data = output->template mutable_data<T>();
924  for (int i = 0; i < output->numel(); ++i) {
925  output_data[i] = i * step + start;
926  }
927  return true;
928 }
929 
930 OPERATOR_SCHEMA(Range)
931  .NumInputs(1, 3)
932  .NumOutputs(1)
933  .SetDoc(R"DOC(
934 Generates an output tensor within the half-open interval $[start, stop)$ (the interval including start but excluding stop).
935 - The `start` input is optional, and defaults to 0 when not set.
936 - The `step` input is optional, and defaults to 1 when not set.
937 - The type of the `output` tensor is determined by the types of inputs used.
938 
939 Github Links:
940 - https://github.com/pytorch/pytorch/blob/master/caffe2/operators/utility_ops.h
941 - https://github.com/pytorch/pytorch/blob/master/caffe2/operators/utility_ops.cc
942 
943 
944 <details>
945 
946 <summary> <b>Example</b> </summary>
947 
948 **Code**
949 
950 ```
951 
952 workspace.ResetWorkspace()
953 
954 op = core.CreateOperator(
955  "Range",
956  ["start", "stop", "step"],
957  ["output"]
958 )
959 
960 workspace.FeedBlob("start", np.array(4, dtype=np.int32))
961 workspace.FeedBlob("stop", np.array(17, dtype=np.int32))
962 workspace.FeedBlob("step", np.array(2, dtype=np.int32))
963 print("start:", workspace.FetchBlob("start"))
964 print("stop:", workspace.FetchBlob("stop"))
965 print("step:", workspace.FetchBlob("step"))
966 workspace.RunOperatorOnce(op)
967 print("output:", workspace.FetchBlob("output"))
968 
969 ```
970 
971 **Result**
972 
973 ```
974 
975 start: 4
976 stop: 17
977 step: 2
978 output: [ 4 6 8 10 12 14 16]
979 
980 ```
981 
982 </details>
983  )DOC")
984  .Input(
985  0,
986  "start",
987  "(*Tensor*): [OPTIONAL] scalar tensor containing the start of the interval (inclusive) (default=0)")
988  .Input(1, "stop", "(*Tensor*): scalar tensor containing the end of the interval (exclusive)")
989  .Input(2, "step", "(*Tensor*): [OPTIONAL] scalar tensor specifying the spacing between values (default=1)")
990  .Output(
991  0,
992  "output",
993  "(*Tensor*): 1D tensor of same type as inputs that contains the sequence");
994 
995 REGISTER_CPU_OPERATOR(Range, RangeOp<CPUContext>);
996 NO_GRADIENT(Range);
997 
998 REGISTER_CPU_OPERATOR(ThrowException, ThrowExceptionOp);
999 OPERATOR_SCHEMA(ThrowException).NumInputs(0).NumOutputs(0);
1000 SHOULD_NOT_DO_GRADIENT(ThrowException);
1001 
1002 REGISTER_CPU_OPERATOR(ThrowChildThreadException, ThrowChildThreadExceptionOp);
1003 OPERATOR_SCHEMA(ThrowChildThreadException).NumInputs(0).NumOutputs(0);
1004 SHOULD_NOT_DO_GRADIENT(ThrowChildThreadException);
1005 
1006 REGISTER_CPU_OPERATOR(LogFatal, LogFatalOp);
1007 OPERATOR_SCHEMA(LogFatal).NumInputs(0).NumOutputs(0);
1008 SHOULD_NOT_DO_GRADIENT(LogFatal);
1009 
1010 REGISTER_CPU_OPERATOR(Fail, FailOp);
1011 OPERATOR_SCHEMA(Fail).NumInputs(0).NumOutputs(0);
1012 SHOULD_NOT_DO_GRADIENT(Fail);
1013 
1014 } // namespace caffe2
Definition: OpClasses.h:414
A global dictionary that holds information about what Caffe2 modules have been loaded in the current ...
Definition: blob.h:13
Definition: static.cpp:70