1 #ifndef CAFFE2_OPERATORS_SEGMENT_REDUCTION_OP_H_ 2 #define CAFFE2_OPERATORS_SEGMENT_REDUCTION_OP_H_ 4 #include "caffe2/core/context.h" 5 #include "caffe2/core/logging.h" 6 #include "caffe2/core/operator.h" 7 #include "caffe2/operators/reducer_functors.h" 11 template <
typename TData>
16 bool observeInput(
const Tensor& dataInput) {
17 data_ = dataInput.raw_data();
18 return dataInput.template IsType<TData>();
22 getBlockPtr(int64_t in_block_size, int64_t idx, int64_t = 1) {
23 return static_cast<const TData*
>(data_) + in_block_size * idx;
27 const void* data_ =
nullptr;
51 USE_OPERATOR_CONTEXT_FUNCTIONS;
54 bool RunOnDevice()
override {
55 auto& dataInput = Input(DATA);
56 auto& segment_ids = Input(SEGMENT_IDS);
58 CAFFE_ENFORCE_EQ(1, segment_ids.dim(),
"SEGMENT_IDS must be a vector");
59 auto N = segment_ids.size(0);
63 "SEGMENT_IDS must have the same length as outer dimension of DATA");
65 OPERATOR_NEEDS_FEATURE(
66 inputAccessor_.observeInput(dataInput),
67 "Unsupported input type: ",
68 dataInput.dtype().name(),
71 const SIndex* s_ids = segment_ids.template data<SIndex>();
73 const SIndex K = N > 0 ? s_ids[N - 1] + 1 : 0;
74 auto shape = dataInput.sizes().vec();
76 auto* output = Output(0, shape, at::dtype<T>());
78 T* out = output->template mutable_data<T>();
84 int64_t block_size = dataInput.numel() / N;
87 CAFFE_ENFORCE_EQ(0, s_ids[0],
"Indices must be sorted and not have gaps");
88 for (int64_t i = 0; i < N;) {
90 for (++i; i < N && s_ids[start] == s_ids[i]; ++i)
96 inputAccessor_.getBlockPtr(block_size, start, i - start),
97 out + block_size * s_ids[start],
105 "Indices must be sorted and not have gaps");
111 static constexpr
int kNumInputs = 2;
112 INPUT_TAGS(DATA, SEGMENT_IDS);
115 InputAccessor inputAccessor_;
122 class RangeReducerGradient>
125 USE_OPERATOR_CONTEXT_FUNCTIONS;
128 bool RunOnDevice()
override {
130 auto& data_in = Input(DATA_IN);
131 auto& data_out = Input(DATA_OUT);
132 auto& segment_grads = Input(SEGMENT_GRADS);
133 auto& segment_ids = Input(SEGMENT_IDS);
135 CAFFE_ENFORCE_EQ(1, segment_ids.dim(),
"SEGMENT_IDS must be a vector");
136 int64_t N = segment_ids.size(0);
138 const SIndex* s_ids = segment_ids.template data<SIndex>();
139 const T* s_grads = segment_grads.template data<T>();
140 const T* d_in = data_in.template data<T>();
141 const T* d_out = data_out.template data<T>();
143 auto shape = segment_grads.sizes().vec();
145 auto* data_grads = Output(0, shape, at::dtype<T>());
147 const SIndex K = segment_grads.size(0);
148 T* out = data_grads->template mutable_data<T>();
154 int64_t block_size = segment_grads.size_from_dim(1);
157 CAFFE_ENFORCE_EQ(0, s_ids[0],
"Indices must be sorted and not have gaps");
160 K - 1, s_ids[N - 1],
"Indices must be sorted and not have gaps");
161 for (int64_t i = 0; i < N;) {
163 for (++i; i < N && s_ids[start] == s_ids[i]; ++i)
166 auto expanded_idx = block_size * start;
167 auto reduced_idx = block_size * s_ids[start];
168 RangeReducerGradient()(
171 s_grads + reduced_idx,
182 "Indices must be sorted and not have gaps");
188 static constexpr
int kNumInputs = 4;
189 INPUT_TAGS(DATA_IN, DATA_OUT, SEGMENT_GRADS, SEGMENT_IDS);
192 template <
typename T,
typename SIndex,
typename Context,
typename ReducerDef>
194 using OpDef = ReducerDef;
195 static constexpr
const char* basename =
"SortedSegmentRange";
196 static constexpr
const char* doc = R
"DOC( 197 Applies '{op}' to each segment of input tensor. In order to allow for more 198 efficient implementation of '{op}', the input segments have to be contiguous 201 SEGMENT_IDS is a vector that maps each of the first dimension slices of the 202 DATA to a particular group (segment). Values belonging to the same segment are 205 The first dimension of the output is equal to the number of input segments, 206 i.e. `SEGMENT_IDS[-1]+1`. Other dimensions are inherited from the input tensor. 210 static void PopulateSchema(
OpSchema& schema) {
211 schema.Input(0,
"DATA",
"Input tensor to be aggregated");
215 "Vector with the same length as the first dimension of DATA " 216 "and values in the range 0..K-1 and in increasing order that " 217 "maps each slice of DATA to one of the segments");
221 "Aggregated tensor with the first dimension of K and the " 222 "other dimentsions inherited from DATA");
228 typename ReducerDef::template Reducer<T, Context>>;
233 typename ReducerDef::template ReducerGradient<T, Context>>;
235 using GradientMakerBase::GradientMakerBase;
236 vector<OperatorDef> GetGradientDefs()
override {
237 return SingleGradientDef(
238 string(basename) + ReducerDef::name +
"Gradient",
240 vector<string>{I(0), O(0), GO(0), I(1)},
242 vector<string>{GI(0)});
283 USE_OPERATOR_CONTEXT_FUNCTIONS;
285 template <
class... Args>
288 OP_SINGLE_ARG(
int,
"num_reduce_dim", num_reduce_dims_, 1) {}
290 bool RunOnDevice()
override {
291 auto& data = Input(0);
294 int64_t in_block_size = FirstDim
295 ? data.size_from_dim(num_reduce_dims_)
296 : data.size_to_dim(data.dim() - num_reduce_dims_);
298 this, in_block_size);
301 template <
int FixedSize>
302 bool DoRunWithValue() {
303 auto& data = Input(0);
305 CAFFE_ENFORCE_LE(num_reduce_dims_, data.dim());
307 typename Reducer::Meta ctx(FirstDim);
308 ctx.observeInput(0, data, num_reduce_dims_);
309 for (
int i = 1; i < Reducer::kInputCount; ++i) {
310 auto& aux_in = Input(i);
311 ctx.observeInput(i, aux_in, num_reduce_dims_);
314 OPERATOR_NEEDS_FEATURE(
315 inputAccessor_.observeInput(data),
316 "Unsupported input type: ",
320 vector<int64_t> shape;
321 ctx.appendOutputShape(&shape);
322 auto* output = Output(0, shape, at::dtype<T>());
324 T* out = output->template mutable_data<T>();
326 const int block_size = FirstDim
327 ? data.size_from_dim(num_reduce_dims_)
328 : data.size_from_dim(data.dim() - num_reduce_dims_);
330 const int num_blocks = block_size > 0 ? data.numel() / block_size : 0;
332 Reducer r(ctx, out, &context_);
333 for (int64_t i = 0; i < num_blocks; ++i) {
334 r.template process<FixedSize>(
335 ctx, inputAccessor_.getBlockPtr(block_size, i), i, &context_);
337 r.template finish<FixedSize>(ctx, &context_);
341 static constexpr
int kNumInputs = Reducer::kInputCount;
344 int num_reduce_dims_;
345 InputAccessor inputAccessor_;
351 class ReducerGradient,
352 bool FirstDim =
true>
355 USE_OPERATOR_CONTEXT_FUNCTIONS;
357 template <
class... Args>
360 OP_SINGLE_ARG(
int,
"num_reduce_dim", num_reduce_dims_, 1) {}
362 bool RunOnDevice()
override {
365 int64_t grad_block_size = Input(REDUCTION_GRAD).numel();
367 this, grad_block_size);
370 template <
int FixedSize>
371 bool DoRunWithValue() {
372 auto& reduction_grad = Input(REDUCTION_GRAD);
373 auto& source_shape = this->
template Input<Tensor>(SOURCE_SHAPE, CPU);
375 typename ReducerGradient::Meta ctx(reduction_grad, 0, FirstDim);
376 for (
int i = 0; i < ReducerGradient::originalInputs().size(); ++i) {
377 auto& aux_in = Input(i);
378 ctx.observeOriginalInput(
379 ReducerGradient::originalInputs()[i],
385 const T* r_grad = reduction_grad.template data<T>();
387 CAFFE_ENFORCE_LE(num_reduce_dims_, source_shape.numel());
389 vector<int64_t> shape(
390 source_shape.template data<int64_t>(),
391 source_shape.template data<int64_t>() + source_shape.numel());
393 auto* data_grads = Output(0, shape, at::dtype<T>());
395 int64_t block_size = FirstDim
396 ? data_grads->size_from_dim(num_reduce_dims_)
397 : data_grads->size_from_dim(data_grads->dim() - num_reduce_dims_);
398 int64_t block_num = block_size > 0 ? data_grads->numel() / block_size : 0;
400 T* out = data_grads->template mutable_data<T>();
402 ReducerGradient r(ctx, r_grad, &context_);
403 for (int64_t i = 0; i < block_num; ++i) {
404 r.template fillGrad<FixedSize>(
406 out + block_size * i,
409 FirstDim ? block_num : block_size);
414 static constexpr
int kNumInputs =
415 ReducerGradient::originalInputs().size() + 2;
417 REDUCTION_GRAD = ReducerGradient::originalInputs().size(),
422 int num_reduce_dims_;
425 template <
typename T,
typename Context,
typename ReducerDef>
427 using OpDef = ReducerDef;
428 static constexpr
const char* basename =
"ReduceFront";
429 static constexpr
const char* doc = R
"DOC( 430 Reduces the input tensor along the first dimension of the input tensor by 431 applying '{op}'. This op acts in a similar way to SortedSegment{op} and 432 UnsortedSegment{op} but as if all input slices belong to a single segment. 436 static void PopulateSchema(
OpSchema& schema) {
438 0,
"DATA",
"Input tensor to be reduced on the first dimension");
440 const vector<TensorShape>& in) {
441 CAFFE_ENFORCE_EQ(1, in.size());
443 int num_reduce_dims = helper.GetSingleArgument<
int>(
"num_reduce_dim", 1);
444 typename ReducerDef::template Reducer<T, Context>::Meta ctx(
true);
445 vector<int64_t> out_dims = ctx.getOutputShape(in[0], num_reduce_dims);
446 return vector<TensorShape>{
447 CreateTensorShape(out_dims, in[0].data_type())};
449 ReducerDef::PopulateSchema(schema);
451 using ReducerGradient =
452 typename ReducerDef::template ReducerGradient<T, Context>;
456 typename ReducerDef::template Reducer<T, Context>,
461 using GradientMakerBase::GradientMakerBase;
462 vector<OperatorDef> GetGradientDefs()
override {
464 string tmp_dims =
"_" + O(0) +
"_dims";
466 vector<string> grad_ins;
467 for (
const int i : ReducerGradient::originalInputs()) {
468 grad_ins.push_back(I(i));
470 grad_ins.push_back(GO(0));
471 grad_ins.push_back(tmp_dims);
473 vector<Argument> args;
474 if (ArgumentHelper::HasArgument(def_,
"num_reduce_dim")) {
475 args.push_back(GetArgument(def_,
"num_reduce_dim"));
478 return vector<OperatorDef>{
480 "Shape",
"", vector<string>{I(0)}, vector<string>{tmp_dims}),
482 string(basename) + ReducerDef::name +
"Gradient",
486 vector<string>{GI(0)}),
492 template <
typename T,
typename Context,
typename ReducerDef>
494 using OpDef = ReducerDef;
495 static constexpr
const char* basename =
"ReduceBack";
496 static constexpr
const char* doc = R
"DOC( 497 Reduces the input tensor along the last dimension of the input tensor by 498 applying '{op}'. This op acts in a similar way to SortedSegment{op} and 499 UnsortedSegment{op} but as if all input slices belong to a single segment. 503 static void PopulateSchema(
OpSchema& schema) {
505 0,
"DATA",
"Input tensor to be reduced on the first dimension");
507 const vector<TensorShape>& in) {
508 CAFFE_ENFORCE_EQ(1, in.size());
510 int num_reduce_dims = helper.GetSingleArgument<
int>(
"num_reduce_dim", 1);
511 typename ReducerDef::template Reducer<T, Context>::Meta ctx(
false);
512 vector<int64_t> out_dims = ctx.getOutputShape(in[0], num_reduce_dims);
513 return vector<TensorShape>{
514 CreateTensorShape(out_dims, in[0].data_type())};
516 ReducerDef::PopulateSchema(schema);
518 using ReducerGradient =
519 typename ReducerDef::template ReducerGradient<T, Context>;
523 typename ReducerDef::template Reducer<T, Context>,
528 using GradientMakerBase::GradientMakerBase;
529 vector<OperatorDef> GetGradientDefs()
override {
531 string tmp_dims =
"_" + O(0) +
"_dims";
533 vector<string> grad_ins;
534 for (
const int i : ReducerGradient::originalInputs()) {
535 grad_ins.push_back(I(i));
537 grad_ins.push_back(GO(0));
538 grad_ins.push_back(tmp_dims);
540 vector<Argument> args;
541 if (ArgumentHelper::HasArgument(def_,
"num_reduce_dim")) {
542 args.push_back(GetArgument(def_,
"num_reduce_dim"));
545 return vector<OperatorDef>{
547 "Shape",
"", vector<string>{I(0)}, vector<string>{tmp_dims}),
549 string(basename) + ReducerDef::name +
"Gradient",
553 vector<string>{GI(0)}),
586 bool SparseFused =
true,
590 USE_OPERATOR_CONTEXT_FUNCTIONS;
593 bool RunOnDevice()
override {
596 this, Input(INDICES));
599 return DoRunWithType<int64_t>();
603 template <
typename IndexType>
604 bool DoRunWithType() {
607 int64_t in_block_size = Input(0).size_from_dim(1);
609 this, in_block_size);
612 template <
typename IndexType,
int FixedSize>
613 bool DoRunWithValue() {
614 auto& dataInput = Input(0);
615 auto& segment_ids = Input(SEGMENT_IDS);
617 CAFFE_ENFORCE_EQ(1, segment_ids.dim(),
"SEGMENT_IDS must be a vector");
618 int64_t N = segment_ids.size(0);
619 const int64_t
M = dataInput.size(0);
621 const IndexType* idxs;
623 auto& indices = Input(INDICES);
624 CAFFE_ENFORCE_EQ(1, indices.dim(),
"INDICES must be a vector");
628 "SEGMENT_IDS must have the same length as INDICES");
629 idxs = indices.template data<IndexType>();
632 N, M,
"DATA must have the same first dimension as SEGMENT_IDS");
637 typename Reducer::Meta ctx;
638 ctx.observeInput(0, dataInput, 1);
639 for (
int i = 1; i < Reducer::kInputCount; ++i) {
640 auto& aux_in = Input(i);
646 " must have the same first dim as SEGMENT_IDS");
647 ctx.observeInput(i, aux_in, 1);
650 OPERATOR_NEEDS_FEATURE(
651 inputAccessor_.observeInput(dataInput),
652 "Unsupported input type: ",
653 dataInput.dtype().name(),
656 const SIndex* s_ids = segment_ids.template data<SIndex>();
658 const SIndex K = N > 0 ? s_ids[N - 1] + 1 : 0;
659 vector<int64_t> shape;
661 ctx.appendOutputShape(&shape);
662 auto* output = Output(0, shape, at::dtype<T>());
664 T* out = output->template mutable_data<T>();
668 int64_t in_block_size = dataInput.size_from_dim(1);
669 int64_t out_block_size = output->size_from_dim(1);
672 CAFFE_ENFORCE_EQ(0, s_ids[0],
"Indices must be sorted and not have gaps");
673 for (int64_t i = 0; i < N;) {
676 Reducer r(ctx, out + out_block_size * s_ids[start], &context_);
677 for (; i < N && s_ids[start] == s_ids[i]; ++i) {
681 0 <= idxs[i] && idxs[i] < M,
682 "Index out of bounds: ",
690 r.template process<FixedSize>(
691 ctx, inputAccessor_.getBlockPtr(in_block_size, idx), i, &context_);
694 r.template finish<FixedSize>(ctx, &context_);
700 "Indices must be sorted and not have gaps");
707 INDICES = Reducer::kInputCount,
708 SEGMENT_IDS = Reducer::kInputCount + (SparseFused ? 1 : 0)
710 static constexpr
int kSelfInputs = SparseFused ? 2 : 1;
711 static constexpr
int kNumInputs = Reducer::kInputCount + kSelfInputs;
714 InputAccessor inputAccessor_;
718 template <
typename T,
typename SIndex,
class Context,
class ReducerGradient>
721 USE_OPERATOR_CONTEXT_FUNCTIONS;
724 bool RunOnDevice()
override {
727 int64_t grad_block_size = Input(SEGMENT_GRADS).size_from_dim(1);
729 this, grad_block_size);
732 template <
int FixedSize>
733 bool DoRunWithValue() {
734 auto& segment_grads = Input(SEGMENT_GRADS);
735 auto& segment_ids = Input(SEGMENT_IDS);
737 CAFFE_ENFORCE_EQ(1, segment_ids.dim(),
"SEGMENT_IDS must be a vector");
738 int64_t N = segment_ids.size(0);
740 typename ReducerGradient::Meta ctx(segment_grads, 1);
741 for (
int i = 0; i < ReducerGradient::originalInputs().size(); ++i) {
742 auto& aux_in = Input(i);
748 " must have the same first dim as SEGMENT_IDS");
749 ctx.observeOriginalInput(
750 ReducerGradient::originalInputs()[i], aux_in,
nullptr , 1);
753 const SIndex* s_ids = segment_ids.template data<SIndex>();
754 const T* s_grads = segment_grads.template data<T>();
756 vector<int64_t> shape;
758 ctx.appendGradShape(&shape);
759 auto* data_grads = Output(0, shape, at::dtype<T>());
761 int64_t d_block_size = data_grads->size_from_dim(1);
762 const SIndex K = segment_grads.size(0);
763 int64_t s_block_size = segment_grads.size_from_dim(1);
764 T* out = data_grads->template mutable_data<T>();
771 CAFFE_ENFORCE_EQ(0, s_ids[0],
"Indices must be sorted and not have gaps");
774 K - 1, s_ids[N - 1],
"Indices must be sorted and not have gaps");
775 for (int64_t i = 0; i < N;) {
779 if (ReducerGradient::computeLength()) {
780 for (; end < N && s_ids[start] == s_ids[end]; ++end) {
784 ReducerGradient r(ctx, s_grads + s_block_size * s_ids[start], &context_);
785 for (; i < N && s_ids[start] == s_ids[i]; ++i) {
786 r.template fillGrad<FixedSize>(
787 ctx, out + d_block_size * i, i, &context_, end - start);
795 "Indices must be sorted and not have gaps");
805 static constexpr
int kNumInputs =
806 ReducerGradient::originalInputs().size() + 2;
808 SEGMENT_GRADS = ReducerGradient::originalInputs().size(),
817 typename ReducerGradient,
821 using GradientMakerBase::GradientMakerBase;
822 vector<OperatorDef> GetGradientDefs()
override {
824 !ReducerGradient::requiresDataInput(Def()),
825 "grads on aux inputs are not yet implemented for Segment operators.");
826 vector<string> grad_ins;
827 for (
const int i : ReducerGradient::originalInputs()) {
828 grad_ins.push_back(I(i));
830 grad_ins.push_back(GO(0));
831 grad_ins.push_back(I(ForwardOp::SEGMENT_IDS));
832 vector<OperatorDef> r{CreateOperatorDef(
833 string(Sorted ?
"SortedSegment" :
"UnsortedSegment") +
834 ReducerDef::name +
"Gradient",
838 vector<string>{SparseFused ? GI_V(0) : GI(0)})};
840 SetSparse(0, I(ForwardOp::INDICES), GI_V(0));
846 template <
typename T,
typename SIndex,
typename Context,
typename ReducerDef>
848 using OpDef = ReducerDef;
849 static constexpr
const char* basename =
"SortedSegment";
850 static constexpr
const char* doc = R
"DOC( 851 Applies '{op}' to each segment of input tensor. Segments need to be sorted and 852 contiguous. See also UnsortedSegment{op} that doesn't have this requirement. 854 SEGMENT_IDS is a vector that maps each of the first dimension slices of the 855 DATA to a particular group (segment). Values belonging to the same segment are 858 The first dimension of the output is equal to the number of input segments, 859 i.e. `SEGMENT_IDS[-1]+1`. Other dimensions are inherited from the input tensor. 863 static void PopulateSchema(
OpSchema& schema) {
864 schema.Input(0,
"DATA",
"Input tensor, slices of which are aggregated.");
866 Reducer::kInputCount,
868 "Vector with the same length as the first dimension of DATA " 869 "and values in the range 0..K-1 and in increasing order that " 870 "maps each slice of DATA to one of the segments");
874 "Aggregated output tensor. Has the first dimension of K " 875 "(the number of segments).");
876 ReducerDef::PopulateSchema(schema);
878 using Reducer =
typename ReducerDef::template Reducer<T, Context>;
879 using ReducerGradient =
880 typename ReducerDef::template ReducerGradient<T, Context>;
892 template <
typename T,
typename SIndex,
typename Context,
typename ReducerDef>
894 using OpDef = ReducerDef;
895 static constexpr
const char* basename =
"SparseSortedSegment";
896 static constexpr
const char* doc = R
"DOC( 897 Pulls in slices of the input tensor, groups them into segments and applies 898 '{op}' to each segment. Segments need to be sorted and contiguous. See also 899 SparseUnsortedSegment{op} that doesn't have this requirement. 901 This op is basically Gather and SortedSegment{op} fused together. 903 INDICES should contain integers in range 0..N-1 where N is the first dimension 904 of DATA. INDICES represent which slices of DATA need to be pulled in. 906 SEGMENT_IDS is a vector that maps each referenced slice of the DATA to a 907 particular group (segment). Values belonging to the same segment are aggregated 908 together. SEGMENT_IDS should have the same dimension as INDICES. 910 The first dimension of the output is equal to the number of input segments, 911 i.e. `SEGMENT_IDS[-1]+1`. Other dimensions are inherited from the input tensor. 915 static void PopulateSchema(
OpSchema& schema) {
916 schema.Input(0,
"DATA",
"Input tensor, slices of which are aggregated.");
918 Reducer::kInputCount,
920 "Integer vector containing indices of the first dimension of DATA for " 921 "the slices that are being aggregated");
923 Reducer::kInputCount + 1,
925 "Vector with the same length as INDICES and values in the range " 926 "0..K-1 and in increasing order that maps each slice of DATA referenced" 927 " by INDICES to one of the segments");
931 "Aggregated output tensor. Has the first dimension of K " 932 "(the number of segments).");
933 ReducerDef::PopulateSchema(schema);
935 using Reducer =
typename ReducerDef::template Reducer<T, Context>;
936 using ReducerGradient =
937 typename ReducerDef::template ReducerGradient<T, Context>;
985 bool SparseFused =
true,
989 USE_OPERATOR_CONTEXT_FUNCTIONS;
991 template <
class... Args>
994 OP_SINGLE_ARG(
int,
"num_segments", num_segments_, -1) {}
996 bool RunOnDevice()
override {
999 this, Input(INDICES));
1002 return DoRunWithType<int64_t>();
1006 template <
typename IndexType>
1007 bool DoRunWithType() {
1010 int64_t in_block_size = Input(0).size_from_dim(1);
1012 this, in_block_size);
1015 template <
typename IndexType,
int FixedSize>
1016 bool DoRunWithValue() {
1017 auto& data = Input(0);
1018 auto& segment_ids = Input(SEGMENT_IDS);
1020 CAFFE_ENFORCE_EQ(1, segment_ids.dim(),
"SEGMENT_IDS must be a vector");
1021 int64_t N = segment_ids.size(0);
1022 const int64_t
M = data.size(0);
1024 const IndexType* idxs;
1026 auto& indices = Input(INDICES);
1027 CAFFE_ENFORCE_EQ(1, indices.dim(),
"INDICES must be a vector");
1031 "SEGMENT_IDS must have the same length as INDICES");
1032 idxs = indices.template data<IndexType>();
1035 N, M,
"DATA must have the same first dimension as SEGMENT_IDS");
1040 typename Reducer::Meta ctx;
1041 ctx.observeInput(0, data, 1);
1042 for (
int i = 1; i < Reducer::kInputCount; ++i) {
1043 auto& aux_in = Input(i);
1049 " must have the same first dim as SEGMENT_IDS");
1050 ctx.observeInput(i, aux_in, 1);
1053 const SIndex* s_ids = segment_ids.template data<SIndex>();
1054 OPERATOR_NEEDS_FEATURE(
1055 inputAccessor_.observeInput(data),
1056 "Unsupported input type: ",
1057 data.dtype().name(),
1062 if (num_segments_ != -1) {
1066 for (int64_t i = 0; i < N; ++i) {
1067 K = std::max(K, s_ids[i] + 1);
1071 vector<int64_t> shape;
1073 ctx.appendOutputShape(&shape);
1074 auto* output = Output(0, shape, at::dtype<T>());
1076 int64_t in_block_size = data.size_from_dim(1);
1077 int64_t out_block_size = output->size_from_dim(1);
1078 T* out = output->template mutable_data<T>();
1081 reducers_.reserve(K);
1082 for (int64_t i = 0; i < K; ++i) {
1083 reducers_.emplace_back(ctx, out + out_block_size * i, &context_);
1086 for (int64_t i = 0; i < N; ++i) {
1087 auto s_id = s_ids[i];
1089 0 <= s_id && s_id < K,
1090 "Segment id out of range: ",
1097 0 <= idxs[i] && idxs[i] < M,
1098 "Index out of bounds: ",
1106 reducers_[s_id].template process<FixedSize>(
1107 ctx, inputAccessor_.getBlockPtr(in_block_size, idx), i, &context_);
1110 for (int64_t i = 0; i < K; ++i) {
1111 reducers_[i].template finish<FixedSize>(ctx, &context_);
1119 INDICES = Reducer::kInputCount,
1120 SEGMENT_IDS = Reducer::kInputCount + (SparseFused ? 1 : 0)
1122 static constexpr
int kSelfInputs = SparseFused ? 2 : 1;
1123 static constexpr
int kNumInputs = Reducer::kInputCount + kSelfInputs;
1126 int64_t num_segments_;
1128 vector<Reducer> reducers_;
1129 InputAccessor inputAccessor_;
1133 template <
typename T,
typename SIndex,
class Context,
class ReducerGradient>
1136 USE_OPERATOR_CONTEXT_FUNCTIONS;
1139 bool RunOnDevice()
override {
1142 int64_t grad_block_size = Input(SEGMENT_GRADS).size_from_dim(1);
1144 this, grad_block_size);
1147 template <
int FixedSize>
1148 bool DoRunWithValue() {
1149 auto& segment_grads = Input(SEGMENT_GRADS);
1150 auto& segment_ids = Input(SEGMENT_IDS);
1152 CAFFE_ENFORCE_EQ(1, segment_ids.dim(),
"SEGMENT_IDS must be a vector");
1153 int64_t N = segment_ids.size(0);
1155 typename ReducerGradient::Meta ctx(segment_grads, 1);
1156 for (
int i = 0; i < ReducerGradient::originalInputs().size(); ++i) {
1157 auto& aux_in = Input(i);
1163 " must have the same first dim as SEGMENT_IDS");
1164 ctx.observeOriginalInput(
1165 ReducerGradient::originalInputs()[i], aux_in,
nullptr , 1);
1168 const SIndex* s_ids = segment_ids.template data<SIndex>();
1169 const T* s_grads = segment_grads.template data<T>();
1171 vector<int64_t> shape;
1173 ctx.appendGradShape(&shape);
1174 auto* data_grads = Output(0, shape, at::dtype<T>());
1176 int64_t d_block_size = data_grads->size_from_dim(1);
1177 const SIndex K = segment_grads.size(0);
1178 int64_t s_block_size = segment_grads.size_from_dim(1);
1179 T* out = data_grads->template mutable_data<T>();
1181 if (ReducerGradient::computeLength()) {
1182 segment_length_.resize(K, 0);
1183 for (
int i = 0; i < N; ++i) {
1184 auto s_id = s_ids[i];
1186 0 <= s_id && s_id < K,
1187 "Segment id out of range: ",
1191 segment_length_[s_ids[i]]++;
1196 reducers_.reserve(K);
1197 for (SIndex i = 0; i < K; ++i) {
1198 reducers_.emplace_back(ctx, s_grads + s_block_size * i, &context_);
1201 for (int64_t i = 0; i < N; ++i) {
1202 auto s_id = s_ids[i];
1203 if (ReducerGradient::computeLength()) {
1204 reducers_[s_id].template fillGrad<FixedSize>(
1205 ctx, out + d_block_size * i, i, &context_, segment_length_[s_id]);
1207 reducers_[s_id].template fillGrad<FixedSize>(
1208 ctx, out + d_block_size * i, i, &context_, 0);
1220 static constexpr
int kNumInputs =
1221 ReducerGradient::originalInputs().size() + 2;
1223 SEGMENT_GRADS = ReducerGradient::originalInputs().size(),
1229 vector<ReducerGradient> reducers_;
1230 vector<int> segment_length_;
1233 template <
typename T,
typename SIndex,
typename Context,
typename ReducerDef>
1235 using OpDef = ReducerDef;
1236 static constexpr
const char* basename =
"UnsortedSegment";
1237 static constexpr
const char* doc = R
"DOC( 1238 Applies '{op}' to each segment of input tensor. Segments ids can appear in 1239 arbitrary order (unlike in SortedSegment{op}). 1241 SEGMENT_IDS is a vector that maps each of the first dimension slices of the 1242 DATA to a particular group (segment). Values belonging to the same segment are 1243 aggregated together. 1245 If `num_segments` argument is passed it would be used as a first dimension for 1246 the output. Otherwise, it'd be dynamically calculated from as the max value of 1247 SEGMENT_IDS plus one. Other output dimensions are inherited from the input 1252 static void PopulateSchema(
OpSchema& schema) {
1255 "Optional int argument specifying the number of output segments and " 1256 "thus the first dimension of the output");
1257 schema.Input(0,
"DATA",
"Input tensor, slices of which are aggregated.");
1259 Reducer::kInputCount,
1261 "Integer vector with the same length as the first dimension of DATA " 1262 "that maps each slice of DATA to one of the segments");
1266 "Aggregated output tensor. Has the first dimension of equal to the " 1267 "number of segments.");
1268 ReducerDef::PopulateSchema(schema);
1270 using Reducer =
typename ReducerDef::template Reducer<T, Context>;
1271 using ReducerGradient =
1272 typename ReducerDef::template ReducerGradient<T, Context>;
1277 typename ReducerDef::template Reducer<T, Context>,
1289 template <
typename T,
typename SIndex,
typename Context,
typename ReducerDef>
1291 using OpDef = ReducerDef;
1292 static constexpr
const char* basename =
"SparseUnsortedSegment";
1293 static constexpr
const char* doc = R
"DOC( 1294 Pulls in slices of the input tensor, groups them into segments and applies 1295 '{op}' to each segment. Segments ids can appear in arbitrary order (unlike in 1296 SparseSortedSegment{op}). 1298 This op is basically Gather and UnsortedSegment{op} fused together. 1300 INDICES should contain integers in range 0..N-1 where N is the first dimension 1301 of DATA. INDICES represent which slices of DATA need to be pulled in. 1303 SEGMENT_IDS is a vector that maps each referenced slice of the DATA to a 1304 particular group (segment). Values belonging to the same segment are aggregated 1305 together. SEGMENT_IDS should have the same dimension as INDICES. 1307 If `num_segments` argument is passed it would be used as a first dimension for 1308 the output. Otherwise, it'd be dynamically calculated from as the max value of 1309 SEGMENT_IDS plus one. Other output dimensions are inherited from the input 1314 static void PopulateSchema(
OpSchema& schema) {
1315 schema.Input(0,
"DATA",
"Input tensor, slices of which are aggregated.");
1317 Reducer::kInputCount,
1319 "Integer vector containing indices of the first dimension of DATA for " 1320 "the slices that are being aggregated");
1322 Reducer::kInputCount + 1,
1324 "Integer vector with the same length as INDICES that maps each slice " 1325 "of DATA referenced by INDICES to one of the segments");
1329 "Aggregated output tensor. Has the first dimension of equal to the " 1330 "number of segments.");
1331 ReducerDef::PopulateSchema(schema);
1333 using Reducer =
typename ReducerDef::template Reducer<T, Context>;
1334 using ReducerGradient =
1335 typename ReducerDef::template ReducerGradient<T, Context>;
1381 bool SparseFused =
true,
1385 USE_OPERATOR_CONTEXT_FUNCTIONS;
1388 bool RunOnDevice()
override {
1391 this, Input(INDICES));
1394 return DoRunWithType<int64_t>();
1398 template <
typename IndexType>
1399 bool DoRunWithType() {
1402 int64_t in_block_size = Input(0).size_from_dim(1);
1404 this, in_block_size);
1407 template <
typename IndexType,
int FixedSize>
1408 bool DoRunWithValue() {
1409 auto& dataInput = Input(0);
1410 auto& lengthsInput = Input(LENGTHS);
1412 CAFFE_ENFORCE_EQ(1, lengthsInput.dim(),
"LENGTHS must be a vector");
1413 const int64_t dataSize = dataInput.size(0);
1415 int64_t dataToReduceSize;
1416 const int64_t outputSize = lengthsInput.size(0);
1418 const IndexType* indices;
1420 auto& indicesInput = Input(INDICES);
1421 CAFFE_ENFORCE_EQ(1, indicesInput.dim(),
"INDICES must be a vector");
1422 indices = indicesInput.template data<IndexType>();
1423 dataToReduceSize = indicesInput.size(0);
1425 dataToReduceSize = dataSize;
1428 typename Reducer::Meta ctx;
1429 ctx.observeInput(0, dataInput, 1);
1430 for (
int i = 1; i < Reducer::kInputCount; ++i) {
1431 auto& aux_in = Input(i);
1433 dataToReduceSize == aux_in.size(0),
1436 " must have the same first dim as SEGMENT_IDS");
1437 ctx.observeInput(i, aux_in, 1);
1440 const TLengths* lengths = lengthsInput.template data<TLengths>();
1442 OPERATOR_NEEDS_FEATURE(
1443 inputAccessor_.observeInput(dataInput),
1444 "Unsupported input type: ",
1445 dataInput.dtype().name(),
1448 vector<int64_t> shape{outputSize};
1449 ctx.appendOutputShape(&shape);
1450 auto* output = Output(0, shape, at::dtype<TData>());
1452 int64_t in_block_size = dataInput.size_from_dim(1);
1453 int64_t out_block_size = output->size_from_dim(1);
1454 TData* out = output->template mutable_data<TData>();
1456 int64_t dataIndex = 0;
1457 for (int64_t rangeIndex = 0; rangeIndex < outputSize; ++rangeIndex) {
1458 Reducer reducer(ctx, out + out_block_size * rangeIndex, &context_);
1459 for (int64_t start = dataIndex; dataIndex < start + lengths[rangeIndex];
1463 idx = indices[dataIndex];
1465 0 <= idx && idx < dataSize,
1468 "th index from the input indices is out of bounds: ",
1470 " vs. valid range 0 to ",
1475 0 <= idx && idx < dataSize,
1476 "When calculating the ",
1478 "th output with length=",
1479 lengths[rangeIndex],
1480 ", the index is out of bounds: ",
1482 " vs. valid range 0 to ",
1486 const TData* input = inputAccessor_.getBlockPtr(in_block_size, idx);
1487 reducer.template process<FixedSize>(ctx, input, dataIndex, &context_);
1489 reducer.template finish<FixedSize>(ctx, &context_);
1492 dataIndex == dataToReduceSize, dataIndex,
" != ", dataToReduceSize);
1498 INDICES = Reducer::kInputCount,
1499 LENGTHS = Reducer::kInputCount + (SparseFused ? 1 : 0)
1501 static constexpr
int kSelfInputs = SparseFused ? 2 : 1;
1502 static constexpr
int kNumInputs = Reducer::kInputCount + kSelfInputs;
1505 InputAccessor inputAccessor_;
1520 class ReducerGradient,
1521 bool GradientNeedIndices =
false>
1524 USE_OPERATOR_CONTEXT_FUNCTIONS;
1527 bool RunOnDevice()
override {
1530 int64_t gradBlockSize = Input(SEGMENT_GRADS).size_from_dim(1);
1532 this, gradBlockSize);
1535 template <
int FixedSize>
1536 bool DoRunWithValue() {
1537 auto& segmentGradsInput = Input(SEGMENT_GRADS);
1538 auto& lengthsInput = Input(LENGTHS);
1540 CAFFE_ENFORCE(lengthsInput.dim() == 1,
"LENGTHS must be a vector");
1541 int64_t reducedDataSize = 0;
1542 int64_t numSegments = lengthsInput.size(0);
1543 CAFFE_ENFORCE(segmentGradsInput.dim() > 0);
1544 CAFFE_ENFORCE(numSegments == segmentGradsInput.size(0));
1545 const TLengths* lengths = lengthsInput.template data<TLengths>();
1546 for (int64_t i = 0; i < numSegments; ++i) {
1547 reducedDataSize += lengths[i];
1550 typename ReducerGradient::Meta ctx(segmentGradsInput, 1);
1551 for (
int i = 0; i < ReducerGradient::originalInputs().size(); ++i) {
1552 auto& aux_in = Input(i);
1558 " must have the same first dim as SEGMENT_IDS");
1559 ctx.observeOriginalInput(
1560 ReducerGradient::originalInputs()[i], aux_in,
nullptr , 1);
1563 const T* segmentGrads = segmentGradsInput.template data<T>();
1565 vector<int64_t> shape;
1566 shape.push_back(reducedDataSize);
1567 ctx.appendGradShape(&shape);
1568 auto* dataGradsOutput = Output(0, shape, at::dtype<T>());
1570 int64_t dataGradsBlockSize = dataGradsOutput->size_from_dim(1);
1571 int64_t segmentBlockSize = segmentGradsInput.size_from_dim(1);
1572 T* dataGrads = dataGradsOutput->template mutable_data<T>();
1574 int64_t dataIndex = 0;
1575 for (int64_t rangeIndex = 0; rangeIndex < numSegments; ++rangeIndex) {
1576 ReducerGradient reducer(
1577 ctx, segmentGrads + segmentBlockSize * rangeIndex, &context_);
1578 for (int64_t start = dataIndex; dataIndex < start + lengths[rangeIndex];
1580 reducer.template fillGrad<FixedSize>(
1582 dataGrads + dataGradsBlockSize * dataIndex,
1585 lengths[rangeIndex]);
1589 dataIndex == reducedDataSize, dataIndex,
" != ", reducedDataSize);
1597 static constexpr
int kNumInputs = ReducerGradient::originalInputs().size() +
1598 2 + (GradientNeedIndices ? 1 : 0);
1600 SEGMENT_GRADS = ReducerGradient::originalInputs().size(),
1609 typename Tembedding,
1613 class ReducerGradient,
1614 bool SparseFused =
true,
1615 bool GradientNeedIndices =
false>
1618 USE_OPERATOR_CONTEXT_FUNCTIONS;
1621 bool RunOnDevice()
override {
1624 this, Input(INDICES));
1627 return DoRunWithType<int64_t>();
1631 template <
typename IndexType>
1632 bool DoRunWithType() {
1635 int64_t in_block_size = Input(SEGMENT_GRADS).size_from_dim(1);
1637 call(
this, in_block_size);
1640 template <
typename IndexType,
int FixedSize>
1641 bool DoRunWithValue() {
1642 auto& dataInput = Input(DATA_INPUT);
1643 auto& segmentGradsInput = Input(SEGMENT_GRADS);
1644 auto& lengthsInput = Input(LENGTHS);
1646 CAFFE_ENFORCE(lengthsInput.dim() == 1,
"LENGTHS must be a vector");
1647 int64_t numSegments = lengthsInput.size(0);
1648 CAFFE_ENFORCE(segmentGradsInput.dim() > 0);
1649 CAFFE_ENFORCE(numSegments == segmentGradsInput.size(0));
1650 const TLengths* lengths = lengthsInput.template data<TLengths>();
1652 typename ReducerGradient::Meta ctx(segmentGradsInput, 1);
1653 for (
int i = 0; i < ReducerGradient::originalInputs().size(); ++i) {
1654 int aux_num = ReducerGradient::originalInputs()[i];
1655 auto& aux_in = Input(i);
1656 auto* aux_grad = aux_num < OutputSize() ? Output(aux_num) :
nullptr;
1657 ctx.observeOriginalInput(aux_num, aux_in, aux_grad, 1);
1661 int64_t dataToReduceSize;
1662 const IndexType* indices =
nullptr;
1664 auto& indicesInput = Input(INDICES);
1665 indices = indicesInput.template data<IndexType>();
1666 dataToReduceSize = indicesInput.size(0);
1668 dataToReduceSize = dataInput.size(0);
1671 const T* segmentGrads = segmentGradsInput.template data<T>();
1673 vector<int64_t> shape;
1674 shape.push_back(dataToReduceSize);
1675 ctx.appendGradShape(&shape);
1676 auto* dataGradsOutput = Output(0, shape, at::dtype<T>());
1678 int64_t dataGradsBlockSize = dataGradsOutput->size_from_dim(1);
1679 int64_t segmentBlockSize = segmentGradsInput.size_from_dim(1);
1680 T* dataGrads = dataGradsOutput->template mutable_data<T>();
1682 const Tembedding* data = dataInput.template data<Tembedding>();
1683 int64_t dataIndex = 0;
1684 for (int64_t rangeIndex = 0; rangeIndex < numSegments; ++rangeIndex) {
1685 ReducerGradient reducer(
1686 ctx, segmentGrads + segmentBlockSize * rangeIndex, &context_);
1687 for (int64_t start = dataIndex; dataIndex < start + lengths[rangeIndex];
1692 data_pos = indices[dataIndex];
1694 data_pos = dataIndex;
1696 reducer.template fillGradWithMainInput<FixedSize>(
1698 data + dataGradsBlockSize * data_pos,
1699 dataGrads + dataGradsBlockSize * dataIndex,
1702 lengths[rangeIndex]);
1713 static constexpr
int kNumInputs = ReducerGradient::originalInputs().size() +
1714 3 + (SparseFused ? 1 : 0) + (GradientNeedIndices ? 1 : 0);
1716 SEGMENT_GRADS = ReducerGradient::originalInputs().size(),
1725 template <
typename T,
typename TLengths,
class Context,
class ReducerGradient>
1729 USE_OPERATOR_CONTEXT_FUNCTIONS;
1732 bool RunOnDevice()
override {
1735 int64_t in_block_size = Input(SEGMENT_GRADS).size_from_dim(1);
1737 this, in_block_size);
1740 template <
int FixedSize>
1741 bool DoRunWithValue() {
1742 auto& dataInput = Input(DATA_INPUT);
1743 auto& segmentGradsInput = Input(SEGMENT_GRADS);
1744 auto& lengthsInput = Input(LENGTHS);
1745 auto& forwardOutputInput = Input(FORWARD_OUTPUT);
1747 CAFFE_ENFORCE(lengthsInput.dim() == 1,
"LENGTHS must be a vector");
1748 int64_t numSegments = lengthsInput.size(0);
1749 CAFFE_ENFORCE(segmentGradsInput.dim() > 0);
1750 CAFFE_ENFORCE(numSegments == segmentGradsInput.size(0));
1751 const TLengths* lengths = lengthsInput.template data<TLengths>();
1753 typename ReducerGradient::Meta ctx(segmentGradsInput, 1);
1754 for (
int i = 0; i < ReducerGradient::originalInputs().size(); ++i) {
1755 int aux_num = ReducerGradient::originalInputs()[i];
1756 auto& aux_in = Input(i);
1757 auto* aux_grad = aux_num < OutputSize() ? Output(aux_num) :
nullptr;
1758 ctx.observeOriginalInput(aux_num, aux_in, aux_grad, 1);
1761 CAFFE_ENFORCE(forwardOutputInput.dim() > 0);
1762 CAFFE_ENFORCE(numSegments == forwardOutputInput.size(0));
1763 const T* forwardOutput = forwardOutputInput.template data<T>();
1765 int64_t dataToReduceSize = dataInput.size(0);
1767 const T* segmentGrads = segmentGradsInput.template data<T>();
1769 vector<int64_t> shape;
1770 shape.push_back(dataToReduceSize);
1771 ctx.appendGradShape(&shape);
1772 auto* dataGradsOutput = Output(0, shape, at::dtype<T>());
1774 int64_t dataGradsBlockSize = dataGradsOutput->size_from_dim(1);
1775 int64_t segmentBlockSize = segmentGradsInput.size_from_dim(1);
1776 T* dataGrads = dataGradsOutput->template mutable_data<T>();
1778 const T* data = dataInput.template data<T>();
1780 int64_t dataIndex = 0;
1781 for (int64_t rangeIndex = 0; rangeIndex < numSegments; ++rangeIndex) {
1782 ReducerGradient reducer(
1783 ctx, segmentGrads + segmentBlockSize * rangeIndex, &context_);
1784 for (int64_t start = dataIndex; dataIndex < start + lengths[rangeIndex];
1787 reducer.template fillGradWithMainInputAndForwardOutput<FixedSize>(
1789 data + dataGradsBlockSize * dataIndex,
1790 dataGrads + dataGradsBlockSize * dataIndex,
1791 forwardOutput + segmentBlockSize * rangeIndex,
1794 lengths[rangeIndex]);
1805 static constexpr
int kNumInputs =
1806 ReducerGradient::originalInputs().size() + 4;
1808 FORWARD_OUTPUT = ReducerGradient::originalInputs().size(),
1818 typename ReducerDef,
1819 typename ReducerGradient,
1821 bool GradientNeedIndices =
false>
1823 using GradientMakerBase::GradientMakerBase;
1824 vector<OperatorDef> GetGradientDefs()
override {
1825 vector<string> grad_ins;
1826 string suffix =
"Gradient";
1827 for (
const int i : ReducerGradient::originalInputs()) {
1828 grad_ins.push_back(I(i));
1830 if (ReducerGradient::requiresForwardOutput()) {
1831 grad_ins.push_back(O(0));
1834 "Forward pass output not yet supported as input for backward pass " 1835 "for SparseLengthsXXX operators");
1836 suffix =
"AndForwardOutput" + suffix;
1838 grad_ins.push_back(GO(0));
1839 grad_ins.push_back(I(ForwardOp::LENGTHS));
1840 bool indices_pushed =
false;
1841 if (ReducerGradient::requiresDataInput(Def())) {
1842 grad_ins.push_back(I(0));
1844 grad_ins.push_back(I(ForwardOp::INDICES));
1845 indices_pushed =
true;
1847 suffix =
"WithMainInput" + suffix;
1849 if (GradientNeedIndices && !indices_pushed) {
1851 grad_ins.push_back(I(ForwardOp::INDICES));
1855 grad_ins.push_back(I(0));
1858 vector<string> grad_outs;
1859 grad_outs.push_back({SparseFused ? GI_V(0) : GI(0)});
1860 int aux_grads = ReducerGradient::numAuxInputsWithGrads(Def());
1861 for (
int i = 1; i <= aux_grads; ++i) {
1862 grad_outs.push_back(GI(i));
1864 vector<OperatorDef> r{CreateOperatorDef(
1865 string(SparseFused ?
"SparseLengths" :
"Lengths") +
1866 string(GradientNeedIndices ?
"IndicesInGradient" :
"") +
1867 ReducerDef::name + suffix,
1872 SetSparse(0, I(ForwardOp::INDICES), GI_V(0));
1882 typename ReducerDef,
1883 bool GradientNeedIndices =
false>
1885 using OpDef = ReducerDef;
1886 static constexpr
const char* basename =
"Lengths";
1887 static constexpr
const char* doc = R
"DOC( 1888 Applies '{op}' to each segment of the input tensor. Segments are defined 1889 by their *LENGTHS*. *LENGTHS* is a vector that maps each of the slices of 1890 *DATA* to a particular segment. Values belonging to the same segment are 1891 aggregated together and considered for the '{op}' operation. 1893 For example *LENGTHS = [2, 1]* stands for segments *DATA[0..1]* and *DATA[2]* 1895 The sum of elements in *LENGTHS* must equal the number of elements in the first 1896 dimension of *DATA*. The length of *OUTPUT* is equal to the number of input 1897 segments, i.e. len(*LENGTHS*). 1903 static void PopulateSchema(
OpSchema& schema) {
1904 schema.Input(0,
"DATA",
"Input tensor, slices of which are aggregated.");
1906 Reducer::kInputCount,
1908 "Vector with the same sum of elements as the first dimension of DATA");
1912 "Aggregated output tensor. Has the first dimension of len(LENGTHS) ");
1914 [](
const OperatorDef& def,
const vector<TensorShape>& in) {
1915 vector<TensorShape> out(0);
1917 for (
int d : in[Reducer::kInputCount].dims()) {
1920 for (
int j = 1; j < in[0].dims_size(); j++) {
1921 output.add_dims(in[0].dims(j));
1923 output.set_data_type(in[0].data_type());
1924 out.push_back(output);
1927 ReducerDef::PopulateSchema(schema);
1929 using Reducer =
typename ReducerDef::template Reducer<T, Context>;
1930 using ReducerGradient =
1931 typename ReducerDef::template ReducerGradient<T, Context>;
1953 GradientNeedIndices>;
1957 const OperatorDef& def,
1958 const vector<TensorShape>& inputs,
1965 typename ReducerDef,
1966 bool GradientNeedIndices =
false>
1968 using OpDef = ReducerDef;
1969 static constexpr
const char* basename =
"SparseLengths";
1970 static constexpr
const char* doc = R
"DOC( 1971 Pulls in slices of the input tensor, groups them into segments and applies 1972 '{op}' to each segment. Segments are defined by their LENGTHS. 1974 This op is basically Gather and Lengths{op} fused together. 1976 INDICES should contain integers in range 0..N-1 where N is the first dimension 1977 of DATA. INDICES represent which slices of DATA need to be pulled in. 1979 LENGTHS is a vector that defines slice sizes by first dimention of DATA. Values 1980 belonging to the same segment are aggregated together. sum(LENGTHS) has 1981 to match INDICES size. 1983 The first dimension of the output is equal to the number of input segment, 1984 i.e. `len(LENGTHS)`. Other dimensions are inherited from the input tensor. 1988 static void PopulateSchema(
OpSchema& schema) {
1989 schema.Input(0,
"DATA",
"Input tensor, slices of which are aggregated.");
1991 Reducer::kInputCount,
1993 "Integer vector containing indices of the first dimension of DATA for " 1994 "the slices that are being aggregated");
1996 Reducer::kInputCount + 1,
1998 "Non negative vector with sum of elements equal to INDICES length");
2002 "Aggregated output tensor. Has the first dimension of K " 2003 "(the number of segments).");
2005 [](
const OperatorDef&,
const std::vector<TensorShape>& input_types) {
2006 std::vector<TensorShape> out(1);
2007 out[0] = input_types[0];
2008 out[0].set_dims(0, input_types[Reducer::kInputCount + 1].dims(0));
2011 ReducerDef::PopulateSchema(schema);
2014 [](
const OperatorDef& def,
2016 return CostInferenceForSparseLengths(
2017 def, inputs, strcmp(OpDef::name,
"WeightedSum") == 0);
2020 using Reducer =
typename ReducerDef::template Reducer<T, Context>;
2021 using ReducerGradient =
2022 typename ReducerDef::template ReducerGradient<T, Context>;
2051 GradientNeedIndices>;
2055 #endif // CAFFE2_OPERATORS_SEGMENT_REDUCTION_OP_H_
A class to record the schema of an op.
Segment reduction op with optional fused embedding lookup.
A helper class to index into arguments.
Segment reduction op with optional fused embedding lookup.
Base implementation for segment reduction op that leverages continuity of the data.
A global dictionary that holds information about what Caffe2 modules have been loaded in the current ...
OpSchema & CostInferenceFunction(CostInferenceFunctionType function)
Register the Cost inference function.
Simple non-segmented reduction over the first few dimensions of the tensor.
OpSchema & TensorInferenceFunction(TensorInferenceFunctionType function)
Sets the tensor inference function, which is a std::function object defined in operator_schema.h.
Unsorted segment reduction op with optional fused embedding lookup.