Caffe2 - C++ API
A deep learning, cross platform ML framework
segment_reduction_op.cc
1 
17 #include "caffe2/operators/segment_reduction_op.h"
18 
19 namespace caffe2 {
20 
21 // registering 4 input gradient with main output
22 OPERATOR_SCHEMA(SparseLengthsIndicesInGradientWeightedSumWithMainInputGradient)
23  .NumInputs(5)
24  .NumOutputs(2);
25 REGISTER_CPU_OPERATOR(
26  SparseLengthsIndicesInGradientWeightedSumWithMainInputGradient,
27  AbstractLengthsWithMainInputGradientOp<
28  float,
29  int,
30  CPUContext,
31  WeightedSumReducerDef::template ReducerGradient<float, CPUContext>,
32  true /*SparseFused*/,
33  true /*GradientNeedIndices*/>);
34 
35 // registering 4 input version
36 OPERATOR_SCHEMA(SparseLengthsIndicesInGradientWeightedSumGradient)
37  .NumInputs(4)
38  .NumOutputs(1);
39 REGISTER_CPU_OPERATOR(
40  SparseLengthsIndicesInGradientWeightedSumGradient,
41  AbstractLengthsGradientOp<
42  float,
43  int,
44  CPUContext,
45  WeightedSumReducerDef::template ReducerGradient<float, CPUContext>,
46  true /*GradientNeedIndices*/>);
47 
48 // registering 3 input version
49 OPERATOR_SCHEMA(SparseLengthsIndicesInGradientSumGradient)
50  .NumInputs(3)
51  .NumOutputs(1);
52 REGISTER_CPU_OPERATOR(
53  SparseLengthsIndicesInGradientSumGradient,
54  AbstractLengthsGradientOp<
55  float,
56  int,
57  CPUContext,
58  SumReducerDef::template ReducerGradient<float, CPUContext>,
59  true /*GradientNeedIndices*/>);
60 OPERATOR_SCHEMA(LengthsIndicesInGradientSumGradient).NumInputs(3).NumOutputs(1);
61 REGISTER_CPU_OPERATOR(
62  LengthsIndicesInGradientSumGradient,
63  AbstractLengthsGradientOp<
64  float,
65  int,
66  CPUContext,
67  SumReducerDef::template ReducerGradient<float, CPUContext>,
68  true /*GradientNeedIndices*/>);
69 
70 namespace {
71 
72 template <typename Def>
73 string FormatDoc() {
74  string doc = Def::doc;
75  ReplaceAll(doc, "{op}", Def::OpDef::name);
76  ReplaceAll(doc, "{op_doc}", Def::OpDef::doc);
77  return doc;
78 }
79 
80 // Helper macro when the main op is defined elsewhere, and we only need to
81 // define the schema, and the gradient op.
82 #define REGISTER_SEGMENT_DEF_SCHEMA_GRADIENT_ONLY(...) \
83  OPERATOR_SCHEMA_STR( \
84  string(__VA_ARGS__::basename) + (__VA_ARGS__::OpDef::name)) \
85  .NumInputs(__VA_ARGS__::ForwardOp::kNumInputs) \
86  .NumOutputs(1) \
87  .SetDoc(FormatDoc<__VA_ARGS__>()) \
88  .Output(0, "OUTPUT", "Aggregated tensor") \
89  .FillUsing(__VA_ARGS__::PopulateSchema); \
90  REGISTER_CPU_OPERATOR_STR( \
91  string(__VA_ARGS__::basename) + (__VA_ARGS__::OpDef::name) + "Gradient", \
92  __VA_ARGS__::BackwardOp); \
93  OPERATOR_SCHEMA_STR( \
94  string(__VA_ARGS__::basename) + (__VA_ARGS__::OpDef::name) + "Gradient") \
95  .NumInputs(__VA_ARGS__::BackwardOp::kNumInputs) \
96  .NumOutputs(1); \
97  REGISTER_GRADIENT_STR( \
98  string(__VA_ARGS__::basename) + (__VA_ARGS__::OpDef::name), \
99  __VA_ARGS__::GetGradient)
100 
101 #define REGISTER_SEGMENT_DEF(...) \
102  REGISTER_CPU_OPERATOR_STR( \
103  string(__VA_ARGS__::basename) + (__VA_ARGS__::OpDef::name), \
104  __VA_ARGS__::ForwardOp); \
105  REGISTER_SEGMENT_DEF_SCHEMA_GRADIENT_ONLY(__VA_ARGS__)
106 
107 REGISTER_SEGMENT_DEF(
108  AbstractSortedSegmentRangeDef<float, int, CPUContext, SumRangeReducerDef>);
109 REGISTER_SEGMENT_DEF(AbstractSortedSegmentRangeDef<
110  float,
111  int,
112  CPUContext,
113  LogSumExpRangeReducerDef>);
114 REGISTER_SEGMENT_DEF(AbstractSortedSegmentRangeDef<
115  float,
116  int,
117  CPUContext,
118  LogMeanExpRangeReducerDef>);
119 REGISTER_SEGMENT_DEF(
120  AbstractSortedSegmentRangeDef<float, int, CPUContext, MeanRangeReducerDef>);
121 REGISTER_SEGMENT_DEF(
122  AbstractSortedSegmentRangeDef<float, int, CPUContext, MaxRangeReducerDef>);
123 
124 #define REGISTER_REDUCER_WITH_OPS(reducer_def) \
125  REGISTER_SEGMENT_DEF( \
126  AbstractSortedSegmentDef<float, int, CPUContext, reducer_def>); \
127  REGISTER_SEGMENT_DEF( \
128  AbstractSparseSortedSegmentDef<float, int, CPUContext, reducer_def>); \
129  REGISTER_SEGMENT_DEF( \
130  AbstractUnsortedSegmentDef<float, int, CPUContext, reducer_def>); \
131  REGISTER_SEGMENT_DEF( \
132  AbstractSparseUnsortedSegmentDef<float, int, CPUContext, reducer_def>)
133 
134 #define REGISTER_REDUCER_WITH_LENGTH_OPS(reducer_def, GradientNeedIndices) \
135  REGISTER_SEGMENT_DEF(AbstractLengthsDef< \
136  float, \
137  int, \
138  CPUContext, \
139  reducer_def, \
140  GradientNeedIndices>)
141 
142 #define REGISTER_REDUCER_WITH_ALL_OPS(reducer_def) \
143  REGISTER_SEGMENT_DEF( \
144  AbstractReduceFrontDef<float, CPUContext, reducer_def>); \
145  REGISTER_REDUCER_WITH_OPS(reducer_def) \
146  REGISTER_REDUCER_WITH_LENGTH_OPS(reducer_def, false)
147 
148 REGISTER_REDUCER_WITH_OPS(SumReducerDef);
149 REGISTER_REDUCER_WITH_LENGTH_OPS(SumReducerDef, true);
150 
151 REGISTER_REDUCER_WITH_OPS(MeanReducerDef);
152 REGISTER_REDUCER_WITH_LENGTH_OPS(MeanReducerDef, false);
153 
154 REGISTER_REDUCER_WITH_ALL_OPS(WeightedSumReducerDef);
155 
156 // SparseLengths[Sum,WeightedSum,Mean] are now implemented separately,
157 // so we only rely to the historical implementation for the backward + schema.
158 REGISTER_SEGMENT_DEF_SCHEMA_GRADIENT_ONLY(AbstractSparseLengthsDef<
159  float,
160  int,
161  CPUContext,
162  SumReducerDef,
163  true /*GradientNeedIndices*/>)
164 REGISTER_SEGMENT_DEF_SCHEMA_GRADIENT_ONLY(AbstractSparseLengthsDef<
165  float,
166  int,
167  CPUContext,
168  WeightedSumReducerDef,
169  true /*GradientNeedIndices*/>)
170 
171 REGISTER_SEGMENT_DEF_SCHEMA_GRADIENT_ONLY(
172  AbstractSparseLengthsDef<float, int, CPUContext, MeanReducerDef>)
173 
174 // Auxiliary output gradients are currently implemented only for Lengths version
175 #define REGISTER_GRADIENT_WITH_MAIN_INPUT(...) \
176  REGISTER_CPU_OPERATOR_STR( \
177  string(__VA_ARGS__::basename) + (__VA_ARGS__::OpDef::name) + \
178  "WithMainInputGradient", \
179  __VA_ARGS__::WithMainInputBackwardOp); \
180  OPERATOR_SCHEMA_STR( \
181  string(__VA_ARGS__::basename) + (__VA_ARGS__::OpDef::name) + \
182  "WithMainInputGradient") \
183  .NumInputs(__VA_ARGS__::WithMainInputBackwardOp::kNumInputs) \
184  .NumOutputs(1, INT_MAX)
185 REGISTER_GRADIENT_WITH_MAIN_INPUT(
186  AbstractLengthsDef<float, int, CPUContext, WeightedSumReducerDef>);
187 REGISTER_GRADIENT_WITH_MAIN_INPUT(
188  AbstractSparseLengthsDef<float, int, CPUContext, WeightedSumReducerDef>);
189 
190 #define REGISTER_GRADIENT_WITH_MAIN_INPUT_AND_FORWARD_OUTPUT(...) \
191  REGISTER_CPU_OPERATOR_STR( \
192  string(__VA_ARGS__::basename) + (__VA_ARGS__::OpDef::name) + \
193  "WithMainInputAndForwardOutputGradient", \
194  __VA_ARGS__::WithMainInputAndForwardOutputBackwardOp); \
195  OPERATOR_SCHEMA_STR( \
196  string(__VA_ARGS__::basename) + (__VA_ARGS__::OpDef::name) + \
197  "WithMainInputAndForwardOutputGradient") \
198  .NumInputs( \
199  __VA_ARGS__::WithMainInputAndForwardOutputBackwardOp::kNumInputs) \
200  .NumOutputs(1, INT_MAX)
201 
202 #define REGISTER_SEGMENT_DEF_MAIN_INPUT_AND_FORWARD_OUTPUT_GRADIENT(...) \
203  OPERATOR_SCHEMA_STR( \
204  string(__VA_ARGS__::basename) + (__VA_ARGS__::OpDef::name)) \
205  .NumInputs(__VA_ARGS__::ForwardOp::kNumInputs) \
206  .NumOutputs(1) \
207  .SetDoc(FormatDoc<__VA_ARGS__>()) \
208  .Output(0, "OUTPUT", "Aggregated tensor") \
209  .FillUsing(__VA_ARGS__::PopulateSchema); \
210  REGISTER_GRADIENT_WITH_MAIN_INPUT_AND_FORWARD_OUTPUT(__VA_ARGS__); \
211  REGISTER_GRADIENT_STR( \
212  string(__VA_ARGS__::basename) + (__VA_ARGS__::OpDef::name), \
213  __VA_ARGS__::GetGradient)
214 
215 // This implements and registers a length op with a gradient which requires
216 // the main input as well as the output of the forward output.
217 #define REGISTER_LENGTHS_OPS_MAIN_INPUT_AND_FORWARD_OUTPUT_GRADIENT(...) \
218  REGISTER_CPU_OPERATOR_STR( \
219  string(__VA_ARGS__::basename) + (__VA_ARGS__::OpDef::name), \
220  __VA_ARGS__::ForwardOp); \
221  REGISTER_SEGMENT_DEF_MAIN_INPUT_AND_FORWARD_OUTPUT_GRADIENT(__VA_ARGS__)
222 
223 REGISTER_LENGTHS_OPS_MAIN_INPUT_AND_FORWARD_OUTPUT_GRADIENT(
224  AbstractLengthsDef<float, int, CPUContext, MaxReducerDef>);
225 }
226 }
Copyright (c) 2016-present, Facebook, Inc.