1 #include "caffe2/operators/segment_reduction_op.h" 5 OpSchema::Cost CostInferenceForSparseLengths(
6 const OperatorDef& def,
7 const vector<TensorShape>& inputs,
9 int min_num_of_inputs = 3 + use_weight;
13 def.type() +
" requires at least " + c10::to_string(min_num_of_inputs));
15 const TensorShape data = inputs[0];
16 const TensorShape indices = inputs[1 + use_weight];
17 const TensorShape lengths = inputs[2 + use_weight];
20 CAFFE_ENFORCE_GT(data.dims_size(), 0,
"data requires at least 1 dimension");
21 uint64_t N = data.dims(0);
25 uint64_t
D = nElemFromDim(data, 1);
27 lengths.dims_size(), 0,
"lengths requires at least 1 dimension");
28 uint64_t
M = lengths.dims(0);
29 uint64_t indices_size = nElemFromDim(indices);
31 c.flops = indices_size * D;
32 c.bytes_read = indices_size *
33 (D *
sizeof(data.data_type()) +
sizeof(indices.data_type())) +
34 M *
sizeof(lengths.data_type());
35 c.params_bytes = N * D *
sizeof(data.data_type());
37 const TensorShape weights = inputs[1];
38 c.flops += indices_size * D;
39 c.bytes_read += indices_size *
sizeof(weights.data_type());
47 OPERATOR_SCHEMA(SparseLengthsIndicesInGradientWeightedSumWithMainInputGradient)
50 REGISTER_CPU_OPERATOR(
51 SparseLengthsIndicesInGradientWeightedSumWithMainInputGradient,
52 AbstractLengthsWithMainInputGradientOp<
57 WeightedSumReducerDef::template ReducerGradient<float, CPUContext>,
62 OPERATOR_SCHEMA(SparseLengthsIndicesInGradientWeightedSumGradient)
65 REGISTER_CPU_OPERATOR(
66 SparseLengthsIndicesInGradientWeightedSumGradient,
67 AbstractLengthsGradientOp<
71 WeightedSumReducerDef::template ReducerGradient<float, CPUContext>,
76 OPERATOR_SCHEMA(SparseLengthsIndicesInGradientSumGradient)
79 REGISTER_CPU_OPERATOR(
80 SparseLengthsIndicesInGradientSumGradient,
81 AbstractLengthsGradientOp<
85 SumReducerDef::template ReducerGradient<float, CPUContext>,
88 OPERATOR_SCHEMA(LengthsIndicesInGradientSumGradient).NumInputs(3).NumOutputs(1);
89 REGISTER_CPU_OPERATOR(
90 LengthsIndicesInGradientSumGradient,
91 AbstractLengthsGradientOp<
95 SumReducerDef::template ReducerGradient<float, CPUContext>,
100 OPERATOR_SCHEMA(SparseLengthsIndicesInGradientMeanGradient)
103 REGISTER_CPU_OPERATOR(
104 SparseLengthsIndicesInGradientMeanGradient,
105 AbstractLengthsGradientOp<
109 MeanReducerDef::template ReducerGradient<float, CPUContext>,
112 OPERATOR_SCHEMA(LengthsIndicesInGradientMeanGradient)
115 REGISTER_CPU_OPERATOR(
116 LengthsIndicesInGradientMeanGradient,
117 AbstractLengthsGradientOp<
121 MeanReducerDef::template ReducerGradient<float, CPUContext>,
126 static const char* kLengthsMaxExtra = R
"DOC( 127 The *LengthsMax* op takes two inputs *DATA* and *LENGTHS*, and produces a single output *OUTPUT*. The op finds the maximum value in each of the segments of *DATA*, where segments are defined by their lengths. 128 For example, if $DATA = [2,4,3,1,2,10]$ and $LENGTHS = [2,3,1]$ then $OUTPUT = [max([2,4]), max([3,1,2]), max([10])] = [4,3,10]$. 131 - https://github.com/caffe2/caffe2/blob/master/caffe2/operators/segment_reduction_op.cc 135 <summary> <b>Example</b> </summary> 141 workspace.ResetWorkspace() 143 op = core.CreateOperator( 149 workspace.FeedBlob("DATA", np.array([2,4,3,1,2,10]).astype(np.float32)) 150 print("DATA:\n", workspace.FetchBlob("DATA")) 152 workspace.FeedBlob("LENGTHS", np.array([2,3,1]).astype(np.int32)) 153 print("LENGTHS:\n", workspace.FetchBlob("LENGTHS")) 155 workspace.RunOperatorOnce(op) 156 print("OUTPUT: \n", workspace.FetchBlob("OUTPUT")) 165 [ 2. 4. 3. 1. 2. 10.] 177 static const char* kLengthsMeanExtra = R
"DOC( 178 The *LengthsMean* op takes two inputs *DATA* and *LENGTHS*, and produces a single output *OUTPUT*. The op finds the mean value in each of the segments of *DATA*, where segments are defined by their lengths. 179 For example, if $DATA = [2,4,3,1,2,10]$ and $LENGTHS = [2,3,1]$ then $OUTPUT = [mean([2,4]), mean([3,1,2]), mean([10])] = [3,2,10]$. 182 - https://github.com/caffe2/caffe2/blob/master/caffe2/operators/segment_reduction_op.cc 186 <summary> <b>Example</b> </summary> 192 workspace.ResetWorkspace() 194 op = core.CreateOperator( 200 workspace.FeedBlob("DATA", np.array([2,4,3,1,2,10]).astype(np.float32)) 201 print("DATA:\n", workspace.FetchBlob("DATA")) 203 workspace.FeedBlob("LENGTHS", np.array([2,3,1]).astype(np.int32)) 204 print("LENGTHS:\n", workspace.FetchBlob("LENGTHS")) 206 workspace.RunOperatorOnce(op) 207 print("OUTPUT: \n", workspace.FetchBlob("OUTPUT")) 216 [ 2. 4. 3. 1. 2. 10.] 228 static const char* kLengthsSumExtra = R
"DOC( 229 The *LengthsSum* op takes two inputs *DATA* and *LENGTHS*, and produces a single output *OUTPUT*. The op finds the sum in each of the segments of *DATA*, where segments are defined by their lengths. 230 For example, if $DATA = [2,4,3,1,2,10]$ and $LENGTHS = [2,3,1]$ then $OUTPUT = [sum([2,4]), sum([3,1,2]), sum([10])] = [6,6,10]$. 233 - https://github.com/caffe2/caffe2/blob/master/caffe2/operators/segment_reduction_op.cc 237 <summary> <b>Example</b> </summary> 243 workspace.ResetWorkspace() 245 op = core.CreateOperator( 251 workspace.FeedBlob("DATA", np.array([2,4,3,1,2,10]).astype(np.float32)) 252 print("DATA:\n", workspace.FetchBlob("DATA")) 254 workspace.FeedBlob("LENGTHS", np.array([2,3,1]).astype(np.int32)) 255 print("LENGTHS:\n", workspace.FetchBlob("LENGTHS")) 257 workspace.RunOperatorOnce(op) 258 print("OUTPUT: \n", workspace.FetchBlob("OUTPUT")) 267 [ 2. 4. 3. 1. 2. 10.] 279 static const char* kLengthsWeightedSumExtra = R
"DOC( 280 The *LengthsWeightedSum* op takes three inputs *DATA*, *LENGTHS*, and *SCALARS*, and produces a single output *OUTPUT*. The op finds the weighted sum in each of the segments of *DATA*, where segments are defined by their lengths. Before calculating the sums, the input *DATA* is weighted by the contents of *SCALARS*. 281 For example, if $DATA = [2,4,3,1,2,10]$, $SCALARS = [8, 2, 1, 4, 1, 0.6]$, and $LENGTHS = [2,3,1]$, then $OUTPUT = [sum([8*2,2*4]), sum([1*3,4*1,1*2]), sum([0.6*10])] = [24,9,6]$. 284 - https://github.com/caffe2/caffe2/blob/master/caffe2/operators/segment_reduction_op.cc 288 <summary> <b>Example</b> </summary> 294 workspace.ResetWorkspace() 296 op = core.CreateOperator( 297 "LengthsWeightedSum", 298 ["DATA", "SCALARS","LENGTHS"], 302 workspace.FeedBlob("DATA", np.array([2,4,3,1,2,10]).astype(np.float32)) 303 print("DATA:\n", workspace.FetchBlob("DATA")) 305 workspace.FeedBlob("SCALARS", np.array([8, 2, 1, 4, 1, 0.6]).astype(np.float32)) 306 print("SCALARS:\n", workspace.FetchBlob("SCALARS")) 308 workspace.FeedBlob("LENGTHS", np.array([2,3,1]).astype(np.int32)) 309 print("LENGTHS:\n", workspace.FetchBlob("LENGTHS")) 311 workspace.RunOperatorOnce(op) 312 print("OUTPUT: \n", workspace.FetchBlob("OUTPUT")) 321 [ 2. 4. 3. 1. 2. 10.] 335 template <
typename Def>
337 string doc = Def::doc;
338 c10::ReplaceAll(doc,
"{op}", Def::OpDef::name);
339 c10::ReplaceAll(doc,
"{op_doc}", Def::OpDef::doc);
340 if (strcmp(Def::OpDef::name,
"Max") == 0) {
341 c10::ReplaceAll(doc,
"{extra}", kLengthsMaxExtra);
342 }
else if (strcmp(Def::OpDef::name,
"Mean") == 0) {
343 c10::ReplaceAll(doc,
"{extra}", kLengthsMeanExtra);
344 }
else if (strcmp(Def::OpDef::name,
"Sum") == 0) {
345 c10::ReplaceAll(doc,
"{extra}", kLengthsSumExtra);
346 }
else if (strcmp(Def::OpDef::name,
"WeightedSum") == 0) {
347 c10::ReplaceAll(doc,
"{extra}", kLengthsWeightedSumExtra);
349 c10::ReplaceAll(doc,
"{extra}",
" ");
355 constexpr
bool equal(
359 char const* rhs3 =
"") {
360 return (*lhs == 0 && *rhs1 == 0 && *rhs2 == 0 && *rhs3 == 0) ||
361 (*rhs1 != 0 && *lhs == *rhs1 && equal(lhs + 1, rhs1 + 1, rhs2, rhs3)) ||
362 (*rhs1 == 0 && *rhs2 != 0 && *lhs == *rhs2 &&
363 equal(lhs + 1, rhs1, rhs2 + 1, rhs3)) ||
364 (*rhs1 == 0 && *rhs2 == 0 && *rhs3 != 0 && *lhs == *rhs3 &&
365 equal(lhs + 1, rhs1, rhs2, rhs3 + 1));
371 #define REGISTER_SEGMENT_DEF_SCHEMA_GRADIENT_ONLY( \ 372 segment_name, gradient_name, ...) \ 374 equal(#segment_name, __VA_ARGS__::basename, __VA_ARGS__::OpDef::name), \ 379 __VA_ARGS__::basename, \ 380 __VA_ARGS__::OpDef::name, \ 383 OPERATOR_SCHEMA(segment_name) \ 384 .NumInputs(__VA_ARGS__::ForwardOp::kNumInputs) \ 386 .DisallowInputFillers() \ 387 .SetDoc(FormatDoc<__VA_ARGS__>()) \ 388 .Output(0, "OUTPUT", "Aggregated tensor") \ 389 .FillUsing(__VA_ARGS__::PopulateSchema); \ 390 REGISTER_CPU_OPERATOR_STR(string(#gradient_name), __VA_ARGS__::BackwardOp); \ 391 OPERATOR_SCHEMA(gradient_name) \ 392 .NumInputs(__VA_ARGS__::BackwardOp::kNumInputs) \ 394 .DisallowInputFillers(); \ 395 REGISTER_GRADIENT_STR(string(#segment_name), __VA_ARGS__::GetGradient) 397 #define REGISTER_SEGMENT_DEF(segment_name, gradient_name, ...) \ 399 equal(#segment_name, __VA_ARGS__::basename, __VA_ARGS__::OpDef::name), \ 401 REGISTER_CPU_OPERATOR_STR(string(#segment_name), __VA_ARGS__::ForwardOp); \ 402 REGISTER_SEGMENT_DEF_SCHEMA_GRADIENT_ONLY( \ 403 segment_name, gradient_name, __VA_ARGS__) 405 REGISTER_SEGMENT_DEF(
406 SortedSegmentRangeSum,
407 SortedSegmentRangeSumGradient,
408 AbstractSortedSegmentRangeDef<float, int, CPUContext, SumRangeReducerDef>);
409 REGISTER_SEGMENT_DEF(
410 SortedSegmentRangeLogSumExp,
411 SortedSegmentRangeLogSumExpGradient,
412 AbstractSortedSegmentRangeDef<
416 LogSumExpRangeReducerDef>);
417 REGISTER_SEGMENT_DEF(
418 SortedSegmentRangeLogMeanExp,
419 SortedSegmentRangeLogMeanExpGradient,
420 AbstractSortedSegmentRangeDef<
424 LogMeanExpRangeReducerDef>);
425 REGISTER_SEGMENT_DEF(
426 SortedSegmentRangeMean,
427 SortedSegmentRangeMeanGradient,
428 AbstractSortedSegmentRangeDef<float, int, CPUContext, MeanRangeReducerDef>);
429 REGISTER_SEGMENT_DEF(
430 SortedSegmentRangeMax,
431 SortedSegmentRangeMaxGradient,
432 AbstractSortedSegmentRangeDef<float, int, CPUContext, MaxRangeReducerDef>);
434 REGISTER_SEGMENT_DEF(
436 SortedSegmentSumGradient,
437 AbstractSortedSegmentDef<float, int, CPUContext, SumReducerDef>);
438 REGISTER_SEGMENT_DEF(
439 SparseSortedSegmentSum,
440 SparseSortedSegmentSumGradient,
441 AbstractSparseSortedSegmentDef<float, int, CPUContext, SumReducerDef>);
442 REGISTER_SEGMENT_DEF(
444 UnsortedSegmentSumGradient,
445 AbstractUnsortedSegmentDef<float, int, CPUContext, SumReducerDef>);
446 REGISTER_SEGMENT_DEF(
447 SparseUnsortedSegmentSum,
448 SparseUnsortedSegmentSumGradient,
449 AbstractSparseUnsortedSegmentDef<float, int, CPUContext, SumReducerDef>);
451 REGISTER_SEGMENT_DEF(
454 AbstractLengthsDef<float, int, CPUContext, SumReducerDef, true>);
456 REGISTER_SEGMENT_DEF(
458 SortedSegmentMeanGradient,
459 AbstractSortedSegmentDef<float, int, CPUContext, MeanReducerDef>);
460 REGISTER_SEGMENT_DEF(
461 SparseSortedSegmentMean,
462 SparseSortedSegmentMeanGradient,
463 AbstractSparseSortedSegmentDef<float, int, CPUContext, MeanReducerDef>);
464 REGISTER_SEGMENT_DEF(
466 UnsortedSegmentMeanGradient,
467 AbstractUnsortedSegmentDef<float, int, CPUContext, MeanReducerDef>);
468 REGISTER_SEGMENT_DEF(
469 SparseUnsortedSegmentMean,
470 SparseUnsortedSegmentMeanGradient,
471 AbstractSparseUnsortedSegmentDef<float, int, CPUContext, MeanReducerDef>);
473 REGISTER_SEGMENT_DEF(
476 AbstractLengthsDef<float, int, CPUContext, MeanReducerDef, true>);
478 REGISTER_SEGMENT_DEF(
479 ReduceFrontWeightedSum,
480 ReduceFrontWeightedSumGradient,
481 AbstractReduceFrontDef<float, CPUContext, WeightedSumReducerDef>);
482 REGISTER_SEGMENT_DEF(
483 SortedSegmentWeightedSum,
484 SortedSegmentWeightedSumGradient,
485 AbstractSortedSegmentDef<float, int, CPUContext, WeightedSumReducerDef>);
486 REGISTER_SEGMENT_DEF(
487 SparseSortedSegmentWeightedSum,
488 SparseSortedSegmentWeightedSumGradient,
489 AbstractSparseSortedSegmentDef<
493 WeightedSumReducerDef>);
494 REGISTER_SEGMENT_DEF(
495 UnsortedSegmentWeightedSum,
496 UnsortedSegmentWeightedSumGradient,
497 AbstractUnsortedSegmentDef<float, int, CPUContext, WeightedSumReducerDef>);
498 REGISTER_SEGMENT_DEF(
499 SparseUnsortedSegmentWeightedSum,
500 SparseUnsortedSegmentWeightedSumGradient,
501 AbstractSparseUnsortedSegmentDef<
505 WeightedSumReducerDef>);
506 REGISTER_SEGMENT_DEF(
508 LengthsWeightedSumGradient,
509 AbstractLengthsDef<float, int, CPUContext, WeightedSumReducerDef, false>);
512 #define REGISTER_GRADIENT_WITH_MAIN_INPUT(gradient_name, ...) \ 516 __VA_ARGS__::basename, \ 517 __VA_ARGS__::OpDef::name, \ 518 "WithMainInputGradient"), \ 520 REGISTER_CPU_OPERATOR_STR( \ 521 string(#gradient_name), __VA_ARGS__::WithMainInputBackwardOp); \ 522 OPERATOR_SCHEMA(gradient_name) \ 523 .NumInputs(__VA_ARGS__::WithMainInputBackwardOp::kNumInputs) \ 524 .NumOutputs(1, INT_MAX) 526 REGISTER_GRADIENT_WITH_MAIN_INPUT(
527 LengthsWeightedSumWithMainInputGradient,
528 AbstractLengthsDef<float, int, CPUContext, WeightedSumReducerDef>);
529 REGISTER_GRADIENT_WITH_MAIN_INPUT(
530 SparseLengthsWeightedSumWithMainInputGradient,
531 AbstractSparseLengthsDef<float, int, CPUContext, WeightedSumReducerDef>);
534 #define REGISTER_GRADIENT_WITH_MAIN_INPUT_AND_FORWARD_OUTPUT( \ 535 gradient_name, ...) \ 539 __VA_ARGS__::basename, \ 540 __VA_ARGS__::OpDef::name, \ 541 "WithMainInputAndForwardOutputGradient"), \ 543 REGISTER_CPU_OPERATOR_STR( \ 544 string(#gradient_name), \ 545 __VA_ARGS__::WithMainInputAndForwardOutputBackwardOp); \ 546 OPERATOR_SCHEMA(gradient_name) \ 548 __VA_ARGS__::WithMainInputAndForwardOutputBackwardOp::kNumInputs) \ 549 .NumOutputs(1, INT_MAX) 551 #define REGISTER_SEGMENT_DEF_MAIN_INPUT_AND_FORWARD_OUTPUT_GRADIENT( \ 552 segment_name, gradient_name, ...) \ 554 equal(#segment_name, __VA_ARGS__::basename, __VA_ARGS__::OpDef::name), \ 556 OPERATOR_SCHEMA(segment_name) \ 557 .NumInputs(__VA_ARGS__::ForwardOp::kNumInputs) \ 559 .SetDoc(FormatDoc<__VA_ARGS__>()) \ 560 .Output(0, "OUTPUT", "Aggregated tensor") \ 561 .FillUsing(__VA_ARGS__::PopulateSchema); \ 562 REGISTER_GRADIENT_WITH_MAIN_INPUT_AND_FORWARD_OUTPUT( \ 563 gradient_name, __VA_ARGS__); \ 564 REGISTER_GRADIENT_STR(string(#segment_name), __VA_ARGS__::GetGradient) 568 #define REGISTER_LENGTHS_OPS_MAIN_INPUT_AND_FORWARD_OUTPUT_GRADIENT( \ 569 segment_name, gradient_name, ...) \ 571 equal(#segment_name, __VA_ARGS__::basename, __VA_ARGS__::OpDef::name), \ 573 REGISTER_CPU_OPERATOR_STR(string(#segment_name), __VA_ARGS__::ForwardOp); \ 574 REGISTER_SEGMENT_DEF_MAIN_INPUT_AND_FORWARD_OUTPUT_GRADIENT( \ 575 segment_name, gradient_name, __VA_ARGS__) 577 REGISTER_LENGTHS_OPS_MAIN_INPUT_AND_FORWARD_OUTPUT_GRADIENT(
579 LengthsMaxWithMainInputAndForwardOutputGradient,
580 AbstractLengthsDef<float, int, CPUContext, MaxReducerDef>);
A global dictionary that holds information about what Caffe2 modules have been loaded in the current ...