Caffe2 - C++ API
A deep learning, cross platform ML framework
feature_maps_ops.h
1 #ifndef CAFFE2_OPERATORS_FEATURE_MAPS_OPS_H_
2 #define CAFFE2_OPERATORS_FEATURE_MAPS_OPS_H_
3 
4 #include "caffe2/core/context.h"
5 #include "caffe2/core/operator.h"
6 
7 namespace caffe2 {
8 
9 template <class Context>
11  public:
12  USE_OPERATOR_CONTEXT_FUNCTIONS;
13 
14  template <class... Args>
15  explicit MergeSingleScalarFeatureTensorsOp(Args&&... args)
16  : Operator<Context>(std::forward<Args>(args)...) {
17  numInputs_ = InputSize() / kNumTensorsPerInput;
18  featureIDs_ = this->template GetRepeatedArgument<int64_t>("feature_ids");
19  }
20  virtual ~MergeSingleScalarFeatureTensorsOp() noexcept {}
21 
22  bool RunOnDevice() override {
23  return DispatchHelper<
25  call(this, Input(0));
26  }
27 
28  template <typename T>
29  bool DoRunWithType() {
30  int numExamples = Input(0).numel();
31  int totalNumFeatures = 0;
32  for (int inputIndex = 0; inputIndex < numInputs_; ++inputIndex) {
33  const bool* inPresenceData =
34  Input(kNumTensorsPerInput * inputIndex + 1).template data<bool>();
35  for (int exampleIndex = 0; exampleIndex < numExamples; ++exampleIndex) {
36  if (inPresenceData[exampleIndex]) {
37  ++totalNumFeatures;
38  }
39  }
40  }
41 
42  auto* outLengths = Output(0, {numExamples}, at::dtype<int32_t>());
43  auto* outKeys = Output(1, {totalNumFeatures}, at::dtype<int64_t>());
44  auto* outValues = Output(2, {totalNumFeatures}, at::dtype<T>());
45 
46  int32_t* outLengthsData = outLengths->template mutable_data<int32_t>();
47  int64_t* outKeysData = outKeys->template mutable_data<int64_t>();
48  T* outValuesData = outValues->template mutable_data<T>();
49 
50  int keysOffset = 0;
51  for (int exampleIndex = 0; exampleIndex < numExamples; ++exampleIndex) {
52  outLengthsData[exampleIndex] = 0;
53  for (int inputIndex = 0; inputIndex < numInputs_; ++inputIndex) {
54  const T* inData =
55  Input(kNumTensorsPerInput * inputIndex).template data<T>();
56  const bool* inPresenceData =
57  Input(kNumTensorsPerInput * inputIndex + 1).template data<bool>();
58  if (inPresenceData[exampleIndex]) {
59  ++outLengthsData[exampleIndex];
60  outKeysData[keysOffset] = featureIDs_[inputIndex];
61  outValuesData[keysOffset] = inData[exampleIndex];
62  ++keysOffset;
63  }
64  }
65  }
66  return true;
67  }
68 
69  private:
70  const int kNumTensorsPerInput = 2;
71  int numInputs_;
72  std::vector<int64_t> featureIDs_;
73 };
74 
75 template <class Context>
77  public:
78  USE_OPERATOR_CONTEXT_FUNCTIONS;
79 
80  template <class... Args>
81  explicit MergeSingleScalarFeatureTensorsGradientOp(Args&&... args)
82  : Operator<Context>(std::forward<Args>(args)...) {
83  numFeatureInputs_ = InputSize() - 1; // Everything other than values_grad
84  }
85  virtual ~MergeSingleScalarFeatureTensorsGradientOp() noexcept {}
86 
87  bool RunOnDevice() override {
88  return DispatchHelper<
90  call(this, Input(InputSize() - 1));
91  }
92 
93  template <typename T>
94  bool DoRunWithType() {
95  int numExamples = Input(0).numel();
96  for (int inputIndex = 0; inputIndex < numFeatureInputs_; ++inputIndex) {
97  Output(inputIndex)->ResizeLike(Input(inputIndex));
98  }
99 
100  const T* inValuesGradData = Input(InputSize() - 1).template data<T>();
101 
102  T default_value = T();
103  int valuesOffset = 0;
104  for (int exampleIndex = 0; exampleIndex < numExamples; ++exampleIndex) {
105  for (int inputIndex = 0; inputIndex < numFeatureInputs_; ++inputIndex) {
106  const bool* inPresenceData = Input(inputIndex).template data<bool>();
107  T* outFeatureData = Output(inputIndex)->template mutable_data<T>();
108  if (inPresenceData[exampleIndex]) {
109  outFeatureData[exampleIndex] = inValuesGradData[valuesOffset];
110  ++valuesOffset;
111  } else {
112  outFeatureData[exampleIndex] = default_value;
113  }
114  }
115  }
116  return true;
117  }
118 
119  private:
120  int numFeatureInputs_;
121 };
122 
123 template <class Context>
124 class MergeSingleListFeatureTensorsOp : public Operator<Context> {
125  public:
126  USE_OPERATOR_CONTEXT_FUNCTIONS;
127 
128  template <class... Args>
129  explicit MergeSingleListFeatureTensorsOp(Args&&... args)
130  : Operator<Context>(std::forward<Args>(args)...) {
131  numInputs_ = InputSize() / kNumTensorsPerInput;
132  inValuesOffset_.resize(numInputs_);
133  featureIDs_ = this->template GetRepeatedArgument<int64_t>("feature_ids");
134  }
135  virtual ~MergeSingleListFeatureTensorsOp() noexcept {}
136 
137  bool RunOnDevice() override {
138  return DispatchHelper<
140  call(this, Input(1));
141  }
142 
143  template <typename T>
144  bool DoRunWithType() {
145  int numExamples = Input(0).numel();
146  int totalNumFeatures = 0;
147  int totalNumValues = 0;
148  for (int inputIndex = 0; inputIndex < numInputs_; ++inputIndex) {
149  const int32_t* inLengthsData =
150  Input(kNumTensorsPerInput * inputIndex).template data<int32_t>();
151  const bool* inPresenceData =
152  Input(kNumTensorsPerInput * inputIndex + 2).template data<bool>();
153  for (int exampleIndex = 0; exampleIndex < numExamples; ++exampleIndex) {
154  if (inPresenceData[exampleIndex]) {
155  ++totalNumFeatures;
156  totalNumValues += inLengthsData[exampleIndex];
157  }
158  }
159  }
160 
161  auto* outLengths = Output(0, {numExamples}, at::dtype<int32_t>());
162  auto* outKeys = Output(1, {totalNumFeatures}, at::dtype<int64_t>());
163  auto* outValuesLengths =
164  Output(2, {totalNumFeatures}, at::dtype<int32_t>());
165  auto* outValuesValues = Output(3, {totalNumValues}, at::dtype<T>());
166 
167  int32_t* outLengthsData = outLengths->template mutable_data<int32_t>();
168  int64_t* outKeysData = outKeys->template mutable_data<int64_t>();
169  int32_t* outValuesLengthsData =
170  outValuesLengths->template mutable_data<int32_t>();
171  T* outValuesValuesData = outValuesValues->template mutable_data<T>();
172 
173  int keysOffset = 0;
174  int valuesOffset = 0;
175  for (int inputIndex = 0; inputIndex < numInputs_; ++inputIndex) {
176  inValuesOffset_[inputIndex] = 0;
177  }
178  for (int exampleIndex = 0; exampleIndex < numExamples; ++exampleIndex) {
179  outLengthsData[exampleIndex] = 0;
180  for (int inputIndex = 0; inputIndex < numInputs_; ++inputIndex) {
181  const int32_t* inLengthsData =
182  Input(kNumTensorsPerInput * inputIndex).template data<int32_t>();
183  const auto& inValues = Input(kNumTensorsPerInput * inputIndex + 1);
184  const bool* inPresenceData =
185  Input(kNumTensorsPerInput * inputIndex + 2).template data<bool>();
186  if (inPresenceData[exampleIndex]) {
187  ++outLengthsData[exampleIndex];
188  outKeysData[keysOffset] = featureIDs_[inputIndex];
189  outValuesLengthsData[keysOffset] = inLengthsData[exampleIndex];
190  context_.CopyItemsSameDevice(
191  inValues.dtype(),
192  inLengthsData[exampleIndex],
193  &inValues.template data<T>()[inValuesOffset_[inputIndex]],
194  &outValuesValuesData[valuesOffset]);
195  valuesOffset += inLengthsData[exampleIndex];
196  inValuesOffset_[inputIndex] += inLengthsData[exampleIndex];
197  ++keysOffset;
198  }
199  }
200  }
201  return true;
202  }
203 
204  private:
205  const int kNumTensorsPerInput = 3;
206  int numInputs_;
207  std::vector<int> inValuesOffset_;
208  std::vector<int64_t> featureIDs_;
209 };
210 
211 template <class Context>
213  public:
214  USE_OPERATOR_CONTEXT_FUNCTIONS;
215 
216  template <class... Args>
217  explicit MergeSingleListOrMapFeatureTensorsGradientOp(Args&&... args)
218  : Operator<Context>(std::forward<Args>(args)...) {
219  numFeatureInputs_ = (InputSize() - 1) / kNumTensorsPerInput;
220  }
221  virtual ~MergeSingleListOrMapFeatureTensorsGradientOp() noexcept {}
222 
223  bool RunOnDevice() override {
224  return DispatchHelper<
226  call(this, Input(InputSize() - 1));
227  }
228 
229  template <typename T>
230  bool DoRunWithType() {
231  int numExamples = Input(0).numel();
232  std::vector<int> outValuesOffset(numFeatureInputs_);
233  for (int inputIndex = 0; inputIndex < numFeatureInputs_; ++inputIndex) {
234  int inputNumValues = 0;
235  const int32_t* inLengthsData =
236  Input(kNumTensorsPerInput * inputIndex).template data<int32_t>();
237  const bool* inPresenceData =
238  Input(kNumTensorsPerInput * inputIndex + 1).template data<bool>();
239  for (int exampleIndex = 0; exampleIndex < numExamples; ++exampleIndex) {
240  if (inPresenceData[exampleIndex]) {
241  inputNumValues += inLengthsData[exampleIndex];
242  }
243  }
244  Output(inputIndex)->Resize(inputNumValues);
245  }
246 
247  const auto& inValuesValuesGrad = Input(InputSize() - 1);
248  const T* inValuesValuesGradData = inValuesValuesGrad.template data<T>();
249 
250  int inValuesValuesOffset = 0;
251  for (int exampleIndex = 0; exampleIndex < numExamples; ++exampleIndex) {
252  for (int inputIndex = 0; inputIndex < numFeatureInputs_; ++inputIndex) {
253  const int32_t* inLengthsData =
254  Input(kNumTensorsPerInput * inputIndex).template data<int32_t>();
255  const bool* inPresenceData =
256  Input(kNumTensorsPerInput * inputIndex + 1).template data<bool>();
257  if (inPresenceData[exampleIndex]) {
258  T* outFeatureValues = Output(inputIndex)->template mutable_data<T>();
259  context_.CopyItemsSameDevice(
260  inValuesValuesGrad.dtype(),
261  inLengthsData[exampleIndex],
262  &inValuesValuesGradData[inValuesValuesOffset],
263  &outFeatureValues[outValuesOffset[inputIndex]]);
264  outValuesOffset[inputIndex] += inLengthsData[exampleIndex];
265  inValuesValuesOffset += inLengthsData[exampleIndex];
266  }
267  }
268  }
269  return true;
270  }
271 
272  private:
273  const int kNumTensorsPerInput = 2;
274  int numFeatureInputs_;
275 };
276 
277 template <class Context>
278 class MergeSingleMapFeatureTensorsOp : public Operator<Context> {
279  public:
280  USE_OPERATOR_CONTEXT_FUNCTIONS;
281 
282  template <class... Args>
283  explicit MergeSingleMapFeatureTensorsOp(Args&&... args)
284  : Operator<Context>(std::forward<Args>(args)...) {
285  numInputs_ = InputSize() / kNumTensorsPerInput;
286  inValuesOffset_.resize(numInputs_);
287  featureIDs_ = this->template GetRepeatedArgument<int64_t>("feature_ids");
288  }
289  virtual ~MergeSingleMapFeatureTensorsOp() noexcept {}
290 
291  bool RunOnDevice() override {
292  return DispatchHelper<
294  call(this, Input(1));
295  }
296 
297  template <typename K>
298  bool DoRunWithType() {
299  return DispatchHelper<
301  K>::call(this, Input(2));
302  }
303 
304  template <typename K, typename V>
305  bool DoRunWithType2() {
306  int numExamples = Input(0).numel();
307  int totalNumFeatures = 0;
308  int totalNumValues = 0;
309  for (int inputIndex = 0; inputIndex < numInputs_; ++inputIndex) {
310  const int32_t* inLengthsData =
311  Input(kNumTensorsPerInput * inputIndex).template data<int32_t>();
312  const bool* inPresenceData =
313  Input(kNumTensorsPerInput * inputIndex + 3).template data<bool>();
314  for (int exampleIndex = 0; exampleIndex < numExamples; ++exampleIndex) {
315  if (inPresenceData[exampleIndex]) {
316  ++totalNumFeatures;
317  totalNumValues += inLengthsData[exampleIndex];
318  }
319  }
320  }
321 
322  auto* outLengths = Output(0, {numExamples}, at::dtype<int32_t>());
323  auto* outKeys = Output(1, {totalNumFeatures}, at::dtype<int64_t>());
324  auto* outValuesLengths =
325  Output(2, {totalNumFeatures}, at::dtype<int32_t>());
326  auto* outValuesKeys = Output(3, {totalNumValues}, at::dtype<K>());
327  auto* outValuesValues = Output(4, {totalNumValues}, at::dtype<V>());
328 
329  int32_t* outLengthsData = outLengths->template mutable_data<int32_t>();
330  int64_t* outKeysData = outKeys->template mutable_data<int64_t>();
331  int32_t* outValuesLengthsData =
332  outValuesLengths->template mutable_data<int32_t>();
333  K* outValuesKeysData = outValuesKeys->template mutable_data<K>();
334  V* outValuesValuesData = outValuesValues->template mutable_data<V>();
335 
336  int keysOffset = 0;
337  int valuesOffset = 0;
338  for (int inputIndex = 0; inputIndex < numInputs_; ++inputIndex) {
339  inValuesOffset_[inputIndex] = 0;
340  }
341  for (int exampleIndex = 0; exampleIndex < numExamples; ++exampleIndex) {
342  outLengthsData[exampleIndex] = 0;
343  for (int inputIndex = 0; inputIndex < numInputs_; ++inputIndex) {
344  const int32_t* inLengthsData =
345  Input(kNumTensorsPerInput * inputIndex).template data<int32_t>();
346  const auto& inKeys = Input(kNumTensorsPerInput * inputIndex + 1);
347  const auto& inValues = Input(kNumTensorsPerInput * inputIndex + 2);
348  const bool* inPresenceData =
349  Input(kNumTensorsPerInput * inputIndex + 3).template data<bool>();
350  if (inPresenceData[exampleIndex]) {
351  ++outLengthsData[exampleIndex];
352  outKeysData[keysOffset] = featureIDs_[inputIndex];
353  outValuesLengthsData[keysOffset] = inLengthsData[exampleIndex];
354  context_.CopyItemsSameDevice(
355  inKeys.dtype(),
356  inLengthsData[exampleIndex],
357  &inKeys.template data<K>()[inValuesOffset_[inputIndex]],
358  &outValuesKeysData[valuesOffset]);
359  context_.CopyItemsSameDevice(
360  inValues.dtype(),
361  inLengthsData[exampleIndex],
362  &inValues.template data<V>()[inValuesOffset_[inputIndex]],
363  &outValuesValuesData[valuesOffset]);
364  valuesOffset += inLengthsData[exampleIndex];
365  inValuesOffset_[inputIndex] += inLengthsData[exampleIndex];
366  ++keysOffset;
367  }
368  }
369  }
370  return true;
371  }
372 
373  private:
374  const int kNumTensorsPerInput = 4;
375  int numInputs_;
376  std::vector<int> inValuesOffset_;
377  std::vector<int64_t> featureIDs_;
378 };
379 
380 template <class Context>
382  public:
383  USE_OPERATOR_CONTEXT_FUNCTIONS;
384 
385  template <class... Args>
386  explicit MergeMultiScalarFeatureTensorsOp(Args&&... args)
387  : Operator<Context>(std::forward<Args>(args)...) {
388  numInputs_ = InputSize() / kNumTensorsPerInput;
389  inKeysOffset_.resize(numInputs_);
390  }
391  virtual ~MergeMultiScalarFeatureTensorsOp() noexcept {}
392 
393  bool RunOnDevice() override {
394  return DispatchHelper<
396  call(this, Input(2));
397  }
398 
399  template <typename T>
400  bool DoRunWithType() {
401  int numExamples = Input(0).numel();
402  int totalNumFeatures = 0;
403  for (int inputIndex = 0; inputIndex < numInputs_; ++inputIndex) {
404  totalNumFeatures += Input(kNumTensorsPerInput * inputIndex + 1).numel();
405  }
406 
407  auto* outLengths = Output(0, {numExamples}, at::dtype<int32_t>());
408  auto* outKeys = Output(1, {totalNumFeatures}, at::dtype<int64_t>());
409  auto* outValues = Output(2, {totalNumFeatures}, at::dtype<T>());
410 
411  int32_t* outLengthsData = outLengths->template mutable_data<int32_t>();
412  int64_t* outKeysData = outKeys->template mutable_data<int64_t>();
413  T* outValuesData = outValues->template mutable_data<T>();
414 
415  int outKeysOffset = 0;
416  for (int inputIndex = 0; inputIndex < numInputs_; ++inputIndex) {
417  inKeysOffset_[inputIndex] = 0;
418  }
419  for (int exampleIndex = 0; exampleIndex < numExamples; ++exampleIndex) {
420  outLengthsData[exampleIndex] = 0;
421  for (int inputIndex = 0; inputIndex < numInputs_; ++inputIndex) {
422  const int32_t* inLengthsData =
423  Input(kNumTensorsPerInput * inputIndex).template data<int32_t>();
424  const int64_t* inKeysData = Input(kNumTensorsPerInput * inputIndex + 1)
425  .template data<int64_t>();
426  const T* inValuesData =
427  Input(kNumTensorsPerInput * inputIndex + 2).template data<T>();
428  outLengthsData[exampleIndex] += inLengthsData[exampleIndex];
429  for (int featureIndex = 0; featureIndex < inLengthsData[exampleIndex];
430  ++featureIndex) {
431  outKeysData[outKeysOffset] = inKeysData[inKeysOffset_[inputIndex]];
432  outValuesData[outKeysOffset] =
433  inValuesData[inKeysOffset_[inputIndex]];
434  ++outKeysOffset;
435  ++inKeysOffset_[inputIndex];
436  }
437  }
438  }
439 
440  return true;
441  }
442 
443  private:
444  const int kNumTensorsPerInput = 3;
445  int numInputs_;
446  std::vector<int> inKeysOffset_;
447 };
448 
449 template <class Context>
451  public:
452  USE_OPERATOR_CONTEXT_FUNCTIONS;
453 
454  template <class... Args>
455  explicit MergeMultiScalarFeatureTensorsGradientOp(Args&&... args)
456  : Operator<Context>(std::forward<Args>(args)...) {
457  numFeatureInputs_ = (InputSize() - 1) / kNumTensorsPerInput;
458  }
459  virtual ~MergeMultiScalarFeatureTensorsGradientOp() noexcept {}
460 
461  bool RunOnDevice() override {
462  return DispatchHelper<
464  call(this, Input(InputSize() - 1));
465  }
466 
467  template <typename T>
468  bool DoRunWithType() {
469  int numExamples = Input(0).numel();
470  std::vector<int> outValuesOffset(numFeatureInputs_);
471  for (int inputIndex = 0; inputIndex < numFeatureInputs_; ++inputIndex) {
472  int inputNumValues = 0;
473  const int32_t* inLengthsData =
474  Input(kNumTensorsPerInput * inputIndex).template data<int32_t>();
475  for (int exampleIndex = 0; exampleIndex < numExamples; ++exampleIndex) {
476  inputNumValues += inLengthsData[exampleIndex];
477  }
478  Output(inputIndex)->Resize(inputNumValues);
479  }
480 
481  const auto& inValuesGrad = Input(InputSize() - 1);
482  const T* inValuesGradData = inValuesGrad.template data<T>();
483 
484  int inValuesOffset = 0;
485  for (int exampleIndex = 0; exampleIndex < numExamples; ++exampleIndex) {
486  for (int inputIndex = 0; inputIndex < numFeatureInputs_; ++inputIndex) {
487  const int32_t* inLengthsData =
488  Input(kNumTensorsPerInput * inputIndex).template data<int32_t>();
489  if (inLengthsData[exampleIndex] > 0) {
490  T* outFeatureValues = Output(inputIndex)->template mutable_data<T>();
491  context_.CopyItemsSameDevice(
492  inValuesGrad.dtype(),
493  inLengthsData[exampleIndex],
494  &inValuesGradData[inValuesOffset],
495  &outFeatureValues[outValuesOffset[inputIndex]]);
496  outValuesOffset[inputIndex] += inLengthsData[exampleIndex];
497  inValuesOffset += inLengthsData[exampleIndex];
498  }
499  }
500  }
501  return true;
502  }
503 
504  private:
505  int kNumTensorsPerInput = 1;
506  int numFeatureInputs_;
507 };
508 
509 template <class Context>
510 class MergeMultiListFeatureTensorsOp : public Operator<Context> {
511  public:
512  USE_OPERATOR_CONTEXT_FUNCTIONS;
513 
514  template <class... Args>
515  explicit MergeMultiListFeatureTensorsOp(Args&&... args)
516  : Operator<Context>(std::forward<Args>(args)...) {
517  numInputs_ = InputSize() / kNumTensorsPerInput;
518  inKeysOffset_.resize(numInputs_);
519  inValuesValuesOffset_.resize(numInputs_);
520  }
521  virtual ~MergeMultiListFeatureTensorsOp() noexcept {}
522 
523  bool RunOnDevice() override {
524  return DispatchHelper<
526  call(this, Input(3));
527  }
528 
529  template <typename T>
530  bool DoRunWithType() {
531  int numExamples = Input(0).numel();
532  int totalNumFeatures = 0;
533  int totalNumValues = 0;
534  for (int inputIndex = 0; inputIndex < numInputs_; ++inputIndex) {
535  totalNumFeatures += Input(kNumTensorsPerInput * inputIndex + 1).numel();
536  totalNumValues += Input(kNumTensorsPerInput * inputIndex + 3).numel();
537  }
538 
539  auto* outLengths = Output(0, {numExamples}, at::dtype<int32_t>());
540  auto* outKeys = Output(1, {totalNumFeatures}, at::dtype<int64_t>());
541  auto* outValuesLengths =
542  Output(2, {totalNumFeatures}, at::dtype<int32_t>());
543  auto* outValuesValues = Output(3, {totalNumValues}, at::dtype<T>());
544 
545  int32_t* outLengthsData = outLengths->template mutable_data<int32_t>();
546  int64_t* outKeysData = outKeys->template mutable_data<int64_t>();
547  int32_t* outValuesLengthsData =
548  outValuesLengths->template mutable_data<int32_t>();
549  T* outValuesValuesData = outValuesValues->template mutable_data<T>();
550 
551  int outKeysOffset = 0;
552  int outValuesValuesOffset = 0;
553  for (int inputIndex = 0; inputIndex < numInputs_; ++inputIndex) {
554  inKeysOffset_[inputIndex] = 0;
555  inValuesValuesOffset_[inputIndex] = 0;
556  }
557  for (int exampleIndex = 0; exampleIndex < numExamples; ++exampleIndex) {
558  outLengthsData[exampleIndex] = 0;
559  for (int inputIndex = 0; inputIndex < numInputs_; ++inputIndex) {
560  const int32_t* inLengthsData =
561  Input(kNumTensorsPerInput * inputIndex).template data<int32_t>();
562  const int64_t* inKeysData = Input(kNumTensorsPerInput * inputIndex + 1)
563  .template data<int64_t>();
564  const int32_t* inValuesLengthsData =
565  Input(kNumTensorsPerInput * inputIndex + 2)
566  .template data<int32_t>();
567  const auto& inValuesValues =
568  Input(kNumTensorsPerInput * inputIndex + 3);
569  outLengthsData[exampleIndex] += inLengthsData[exampleIndex];
570  for (int featureIndex = 0; featureIndex < inLengthsData[exampleIndex];
571  ++featureIndex) {
572  outKeysData[outKeysOffset] = inKeysData[inKeysOffset_[inputIndex]];
573  outValuesLengthsData[outKeysOffset] =
574  inValuesLengthsData[inKeysOffset_[inputIndex]];
575  context_.CopyItemsSameDevice(
576  inValuesValues.dtype(),
577  inValuesLengthsData[inKeysOffset_[inputIndex]],
578  &inValuesValues
579  .template data<T>()[inValuesValuesOffset_[inputIndex]],
580  &outValuesValuesData[outValuesValuesOffset]);
581  outValuesValuesOffset +=
582  inValuesLengthsData[inKeysOffset_[inputIndex]];
583  inValuesValuesOffset_[inputIndex] +=
584  inValuesLengthsData[inKeysOffset_[inputIndex]];
585  ++outKeysOffset;
586  ++inKeysOffset_[inputIndex];
587  }
588  }
589  }
590 
591  return true;
592  }
593 
594  private:
595  const int kNumTensorsPerInput = 4;
596  int numInputs_;
597  std::vector<int> inKeysOffset_;
598  std::vector<int> inValuesValuesOffset_;
599 };
600 
601 template <class Context>
602 class MergeMultiMapFeatureTensorsOp : public Operator<Context> {
603  public:
604  USE_OPERATOR_CONTEXT_FUNCTIONS;
605 
606  template <class... Args>
607  explicit MergeMultiMapFeatureTensorsOp(Args&&... args)
608  : Operator<Context>(std::forward<Args>(args)...) {
609  numInputs_ = InputSize() / kNumTensorsPerInput;
610  inKeysOffset_.resize(numInputs_);
611  inValuesValuesOffset_.resize(numInputs_);
612  }
613  virtual ~MergeMultiMapFeatureTensorsOp() noexcept {}
614 
615  bool RunOnDevice() override {
616  return DispatchHelper<
618  call(this, Input(3));
619  }
620 
621  template <typename K>
622  bool DoRunWithType() {
623  return DispatchHelper<
625  K>::call(this, Input(4));
626  }
627 
628  template <typename K, typename V>
629  bool DoRunWithType2() {
630  int numExamples = Input(0).numel();
631  int totalNumFeatures = 0;
632  int totalNumValues = 0;
633  for (int inputIndex = 0; inputIndex < numInputs_; ++inputIndex) {
634  totalNumFeatures += Input(kNumTensorsPerInput * inputIndex + 1).numel();
635  totalNumValues += Input(kNumTensorsPerInput * inputIndex + 4).numel();
636  }
637 
638  auto* outLengths = Output(0, {numExamples}, at::dtype<int32_t>());
639  auto* outKeys = Output(1, {totalNumFeatures}, at::dtype<int64_t>());
640  auto* outValuesLengths =
641  Output(2, {totalNumFeatures}, at::dtype<int32_t>());
642  auto* outValuesKeys = Output(3, {totalNumValues}, at::dtype<K>());
643  auto* outValuesValues = Output(4, {totalNumValues}, at::dtype<V>());
644 
645  int32_t* outLengthsData = outLengths->template mutable_data<int32_t>();
646  int64_t* outKeysData = outKeys->template mutable_data<int64_t>();
647  int32_t* outValuesLengthsData =
648  outValuesLengths->template mutable_data<int32_t>();
649  K* outValuesKeysData = outValuesKeys->template mutable_data<K>();
650  V* outValuesValuesData = outValuesValues->template mutable_data<V>();
651 
652  int outKeysOffset = 0;
653  int outValuesValuesOffset = 0;
654  for (int inputIndex = 0; inputIndex < numInputs_; ++inputIndex) {
655  inKeysOffset_[inputIndex] = 0;
656  inValuesValuesOffset_[inputIndex] = 0;
657  }
658  for (int exampleIndex = 0; exampleIndex < numExamples; ++exampleIndex) {
659  outLengthsData[exampleIndex] = 0;
660  for (int inputIndex = 0; inputIndex < numInputs_; ++inputIndex) {
661  const int32_t* inLengthsData =
662  Input(kNumTensorsPerInput * inputIndex).template data<int32_t>();
663  const int64_t* inKeysData = Input(kNumTensorsPerInput * inputIndex + 1)
664  .template data<int64_t>();
665  const int32_t* inValuesLengthsData =
666  Input(kNumTensorsPerInput * inputIndex + 2)
667  .template data<int32_t>();
668  const auto& inValuesKeys = Input(kNumTensorsPerInput * inputIndex + 3);
669  const auto& inValuesValues =
670  Input(kNumTensorsPerInput * inputIndex + 4);
671  outLengthsData[exampleIndex] += inLengthsData[exampleIndex];
672  for (int featureIndex = 0; featureIndex < inLengthsData[exampleIndex];
673  ++featureIndex) {
674  outKeysData[outKeysOffset] = inKeysData[inKeysOffset_[inputIndex]];
675  outValuesLengthsData[outKeysOffset] =
676  inValuesLengthsData[inKeysOffset_[inputIndex]];
677  context_.CopyItemsSameDevice(
678  inValuesKeys.dtype(),
679  inValuesLengthsData[inKeysOffset_[inputIndex]],
680  &inValuesKeys
681  .template data<K>()[inValuesValuesOffset_[inputIndex]],
682  &outValuesKeysData[outValuesValuesOffset]);
683  context_.CopyItemsSameDevice(
684  inValuesValues.dtype(),
685  inValuesLengthsData[inKeysOffset_[inputIndex]],
686  &inValuesValues
687  .template data<V>()[inValuesValuesOffset_[inputIndex]],
688  &outValuesValuesData[outValuesValuesOffset]);
689  outValuesValuesOffset +=
690  inValuesLengthsData[inKeysOffset_[inputIndex]];
691  inValuesValuesOffset_[inputIndex] +=
692  inValuesLengthsData[inKeysOffset_[inputIndex]];
693  ++outKeysOffset;
694  ++inKeysOffset_[inputIndex];
695  }
696  }
697  }
698 
699  return true;
700  }
701 
702  private:
703  const int kNumTensorsPerInput = 5;
704  int numInputs_;
705  std::vector<int> inKeysOffset_;
706  std::vector<int> inValuesValuesOffset_;
707 };
708 
709 template <class Context>
711  public:
712  USE_OPERATOR_CONTEXT_FUNCTIONS;
713 
714  template <class... Args>
715  explicit MergeMultiListOrMapFeatureTensorsGradientOp(Args&&... args)
716  : Operator<Context>(std::forward<Args>(args)...) {
717  numFeatureInputs_ = (InputSize() - 1) / kNumTensorsPerInput;
718  }
719  virtual ~MergeMultiListOrMapFeatureTensorsGradientOp() noexcept {}
720 
721  bool RunOnDevice() override {
722  return DispatchHelper<
724  call(this, Input(InputSize() - 1));
725  }
726 
727  template <typename T>
728  bool DoRunWithType() {
729  int numExamples = Input(0).numel();
730  std::vector<int> outValuesLengthOffset(numFeatureInputs_);
731  std::vector<int> outValuesValuesOffset(numFeatureInputs_);
732  for (int inputIndex = 0; inputIndex < numFeatureInputs_; ++inputIndex) {
733  int inputNumValues = 0;
734  auto& inValuesLength = Input(kNumTensorsPerInput * inputIndex + 1);
735  const int32_t* inValuesLengthsData =
736  inValuesLength.template data<int32_t>();
737  for (int valuesIndex = 0; valuesIndex < inValuesLength.numel();
738  ++valuesIndex) {
739  inputNumValues += inValuesLengthsData[valuesIndex];
740  }
741  Output(inputIndex)->Resize(inputNumValues);
742  }
743 
744  const auto& inValuesValuesGrad = Input(InputSize() - 1);
745  const T* inValuesValuesGradData = inValuesValuesGrad.template data<T>();
746 
747  int inValuesValuesOffset = 0;
748  for (int exampleIndex = 0; exampleIndex < numExamples; ++exampleIndex) {
749  for (int inputIndex = 0; inputIndex < numFeatureInputs_; ++inputIndex) {
750  const int32_t* inLengthsData =
751  Input(kNumTensorsPerInput * inputIndex).template data<int32_t>();
752  const int32_t* inValuesLengthsData =
753  Input(kNumTensorsPerInput * inputIndex + 1)
754  .template data<int32_t>();
755  int valuesLengthCopy = 0;
756  for (int valuesLengthIndex = 0;
757  valuesLengthIndex < inLengthsData[exampleIndex];
758  ++valuesLengthIndex) {
759  valuesLengthCopy += inValuesLengthsData
760  [outValuesLengthOffset[inputIndex] + valuesLengthIndex];
761  }
762  if (valuesLengthCopy > 0) {
763  T* outFeatureValues = Output(inputIndex)->template mutable_data<T>();
764  context_.CopyItemsSameDevice(
765  inValuesValuesGrad.dtype(),
766  valuesLengthCopy,
767  &inValuesValuesGradData[inValuesValuesOffset],
768  &outFeatureValues[outValuesValuesOffset[inputIndex]]);
769  }
770  outValuesLengthOffset[inputIndex] += inLengthsData[exampleIndex];
771  outValuesValuesOffset[inputIndex] += valuesLengthCopy;
772  inValuesValuesOffset += valuesLengthCopy;
773  }
774  }
775  return true;
776  }
777 
778  private:
779  int kNumTensorsPerInput = 2;
780  int numFeatureInputs_;
781 };
782 
783 } // namespace caffe2
784 
785 #endif // CAFFE2_OPERATORS_FEATURE_MAPS_OPS_H_
const Tensor & Input(int idx, DeviceType type=Context::GetDeviceType())
Retrieve a non-owning reference to the input at position &#39;idx&#39; for this operator. ...
Definition: operator.h:702
A global dictionary that holds information about what Caffe2 modules have been loaded in the current ...
Definition: blob.h:13