1 #ifndef CAFFE2_OPERATORS_FEATURE_MAPS_OPS_H_ 2 #define CAFFE2_OPERATORS_FEATURE_MAPS_OPS_H_ 4 #include "caffe2/core/context.h" 5 #include "caffe2/core/operator.h" 9 template <
class Context>
12 USE_OPERATOR_CONTEXT_FUNCTIONS;
14 template <
class... Args>
17 numInputs_ = InputSize() / kNumTensorsPerInput;
18 featureIDs_ = this->
template GetRepeatedArgument<int64_t>(
"feature_ids");
20 virtual ~MergeSingleScalarFeatureTensorsOp() noexcept {}
22 bool RunOnDevice()
override {
29 bool DoRunWithType() {
30 int numExamples =
Input(0).numel();
31 int totalNumFeatures = 0;
32 for (
int inputIndex = 0; inputIndex < numInputs_; ++inputIndex) {
33 const bool* inPresenceData =
34 Input(kNumTensorsPerInput * inputIndex + 1).template data<bool>();
35 for (
int exampleIndex = 0; exampleIndex < numExamples; ++exampleIndex) {
36 if (inPresenceData[exampleIndex]) {
42 auto* outLengths = Output(0, {numExamples}, at::dtype<int32_t>());
43 auto* outKeys = Output(1, {totalNumFeatures}, at::dtype<int64_t>());
44 auto* outValues = Output(2, {totalNumFeatures}, at::dtype<T>());
46 int32_t* outLengthsData = outLengths->template mutable_data<int32_t>();
47 int64_t* outKeysData = outKeys->template mutable_data<int64_t>();
48 T* outValuesData = outValues->template mutable_data<T>();
51 for (
int exampleIndex = 0; exampleIndex < numExamples; ++exampleIndex) {
52 outLengthsData[exampleIndex] = 0;
53 for (
int inputIndex = 0; inputIndex < numInputs_; ++inputIndex) {
55 Input(kNumTensorsPerInput * inputIndex).template data<T>();
56 const bool* inPresenceData =
57 Input(kNumTensorsPerInput * inputIndex + 1).template data<bool>();
58 if (inPresenceData[exampleIndex]) {
59 ++outLengthsData[exampleIndex];
60 outKeysData[keysOffset] = featureIDs_[inputIndex];
61 outValuesData[keysOffset] = inData[exampleIndex];
70 const int kNumTensorsPerInput = 2;
72 std::vector<int64_t> featureIDs_;
75 template <
class Context>
78 USE_OPERATOR_CONTEXT_FUNCTIONS;
80 template <
class... Args>
83 numFeatureInputs_ = InputSize() - 1;
85 virtual ~MergeSingleScalarFeatureTensorsGradientOp() noexcept {}
87 bool RunOnDevice()
override {
90 call(
this,
Input(InputSize() - 1));
94 bool DoRunWithType() {
95 int numExamples =
Input(0).numel();
96 for (
int inputIndex = 0; inputIndex < numFeatureInputs_; ++inputIndex) {
97 Output(inputIndex)->ResizeLike(
Input(inputIndex));
100 const T* inValuesGradData =
Input(InputSize() - 1).template data<T>();
102 T default_value =
T();
103 int valuesOffset = 0;
104 for (
int exampleIndex = 0; exampleIndex < numExamples; ++exampleIndex) {
105 for (
int inputIndex = 0; inputIndex < numFeatureInputs_; ++inputIndex) {
106 const bool* inPresenceData =
Input(inputIndex).template data<bool>();
107 T* outFeatureData = Output(inputIndex)->template mutable_data<T>();
108 if (inPresenceData[exampleIndex]) {
109 outFeatureData[exampleIndex] = inValuesGradData[valuesOffset];
112 outFeatureData[exampleIndex] = default_value;
120 int numFeatureInputs_;
123 template <
class Context>
126 USE_OPERATOR_CONTEXT_FUNCTIONS;
128 template <
class... Args>
131 numInputs_ = InputSize() / kNumTensorsPerInput;
132 inValuesOffset_.resize(numInputs_);
133 featureIDs_ = this->
template GetRepeatedArgument<int64_t>(
"feature_ids");
135 virtual ~MergeSingleListFeatureTensorsOp() noexcept {}
137 bool RunOnDevice()
override {
140 call(
this,
Input(1));
143 template <
typename T>
144 bool DoRunWithType() {
145 int numExamples =
Input(0).numel();
146 int totalNumFeatures = 0;
147 int totalNumValues = 0;
148 for (
int inputIndex = 0; inputIndex < numInputs_; ++inputIndex) {
149 const int32_t* inLengthsData =
150 Input(kNumTensorsPerInput * inputIndex).template data<int32_t>();
151 const bool* inPresenceData =
152 Input(kNumTensorsPerInput * inputIndex + 2).template data<bool>();
153 for (
int exampleIndex = 0; exampleIndex < numExamples; ++exampleIndex) {
154 if (inPresenceData[exampleIndex]) {
156 totalNumValues += inLengthsData[exampleIndex];
161 auto* outLengths = Output(0, {numExamples}, at::dtype<int32_t>());
162 auto* outKeys = Output(1, {totalNumFeatures}, at::dtype<int64_t>());
163 auto* outValuesLengths =
164 Output(2, {totalNumFeatures}, at::dtype<int32_t>());
165 auto* outValuesValues = Output(3, {totalNumValues}, at::dtype<T>());
167 int32_t* outLengthsData = outLengths->template mutable_data<int32_t>();
168 int64_t* outKeysData = outKeys->template mutable_data<int64_t>();
169 int32_t* outValuesLengthsData =
170 outValuesLengths->template mutable_data<int32_t>();
171 T* outValuesValuesData = outValuesValues->template mutable_data<T>();
174 int valuesOffset = 0;
175 for (
int inputIndex = 0; inputIndex < numInputs_; ++inputIndex) {
176 inValuesOffset_[inputIndex] = 0;
178 for (
int exampleIndex = 0; exampleIndex < numExamples; ++exampleIndex) {
179 outLengthsData[exampleIndex] = 0;
180 for (
int inputIndex = 0; inputIndex < numInputs_; ++inputIndex) {
181 const int32_t* inLengthsData =
182 Input(kNumTensorsPerInput * inputIndex).template data<int32_t>();
183 const auto& inValues =
Input(kNumTensorsPerInput * inputIndex + 1);
184 const bool* inPresenceData =
185 Input(kNumTensorsPerInput * inputIndex + 2).template data<bool>();
186 if (inPresenceData[exampleIndex]) {
187 ++outLengthsData[exampleIndex];
188 outKeysData[keysOffset] = featureIDs_[inputIndex];
189 outValuesLengthsData[keysOffset] = inLengthsData[exampleIndex];
190 context_.CopyItemsSameDevice(
192 inLengthsData[exampleIndex],
193 &inValues.template data<T>()[inValuesOffset_[inputIndex]],
194 &outValuesValuesData[valuesOffset]);
195 valuesOffset += inLengthsData[exampleIndex];
196 inValuesOffset_[inputIndex] += inLengthsData[exampleIndex];
205 const int kNumTensorsPerInput = 3;
207 std::vector<int> inValuesOffset_;
208 std::vector<int64_t> featureIDs_;
211 template <
class Context>
214 USE_OPERATOR_CONTEXT_FUNCTIONS;
216 template <
class... Args>
219 numFeatureInputs_ = (InputSize() - 1) / kNumTensorsPerInput;
221 virtual ~MergeSingleListOrMapFeatureTensorsGradientOp() noexcept {}
223 bool RunOnDevice()
override {
226 call(
this,
Input(InputSize() - 1));
229 template <
typename T>
230 bool DoRunWithType() {
231 int numExamples =
Input(0).numel();
232 std::vector<int> outValuesOffset(numFeatureInputs_);
233 for (
int inputIndex = 0; inputIndex < numFeatureInputs_; ++inputIndex) {
234 int inputNumValues = 0;
235 const int32_t* inLengthsData =
236 Input(kNumTensorsPerInput * inputIndex).template data<int32_t>();
237 const bool* inPresenceData =
238 Input(kNumTensorsPerInput * inputIndex + 1).template data<bool>();
239 for (
int exampleIndex = 0; exampleIndex < numExamples; ++exampleIndex) {
240 if (inPresenceData[exampleIndex]) {
241 inputNumValues += inLengthsData[exampleIndex];
244 Output(inputIndex)->Resize(inputNumValues);
247 const auto& inValuesValuesGrad =
Input(InputSize() - 1);
248 const T* inValuesValuesGradData = inValuesValuesGrad.template data<T>();
250 int inValuesValuesOffset = 0;
251 for (
int exampleIndex = 0; exampleIndex < numExamples; ++exampleIndex) {
252 for (
int inputIndex = 0; inputIndex < numFeatureInputs_; ++inputIndex) {
253 const int32_t* inLengthsData =
254 Input(kNumTensorsPerInput * inputIndex).template data<int32_t>();
255 const bool* inPresenceData =
256 Input(kNumTensorsPerInput * inputIndex + 1).template data<bool>();
257 if (inPresenceData[exampleIndex]) {
258 T* outFeatureValues = Output(inputIndex)->template mutable_data<T>();
259 context_.CopyItemsSameDevice(
260 inValuesValuesGrad.dtype(),
261 inLengthsData[exampleIndex],
262 &inValuesValuesGradData[inValuesValuesOffset],
263 &outFeatureValues[outValuesOffset[inputIndex]]);
264 outValuesOffset[inputIndex] += inLengthsData[exampleIndex];
265 inValuesValuesOffset += inLengthsData[exampleIndex];
273 const int kNumTensorsPerInput = 2;
274 int numFeatureInputs_;
277 template <
class Context>
280 USE_OPERATOR_CONTEXT_FUNCTIONS;
282 template <
class... Args>
285 numInputs_ = InputSize() / kNumTensorsPerInput;
286 inValuesOffset_.resize(numInputs_);
287 featureIDs_ = this->
template GetRepeatedArgument<int64_t>(
"feature_ids");
289 virtual ~MergeSingleMapFeatureTensorsOp() noexcept {}
291 bool RunOnDevice()
override {
294 call(
this,
Input(1));
297 template <
typename K>
298 bool DoRunWithType() {
301 K>::call(
this,
Input(2));
304 template <
typename K,
typename V>
305 bool DoRunWithType2() {
306 int numExamples =
Input(0).numel();
307 int totalNumFeatures = 0;
308 int totalNumValues = 0;
309 for (
int inputIndex = 0; inputIndex < numInputs_; ++inputIndex) {
310 const int32_t* inLengthsData =
311 Input(kNumTensorsPerInput * inputIndex).template data<int32_t>();
312 const bool* inPresenceData =
313 Input(kNumTensorsPerInput * inputIndex + 3).template data<bool>();
314 for (
int exampleIndex = 0; exampleIndex < numExamples; ++exampleIndex) {
315 if (inPresenceData[exampleIndex]) {
317 totalNumValues += inLengthsData[exampleIndex];
322 auto* outLengths = Output(0, {numExamples}, at::dtype<int32_t>());
323 auto* outKeys = Output(1, {totalNumFeatures}, at::dtype<int64_t>());
324 auto* outValuesLengths =
325 Output(2, {totalNumFeatures}, at::dtype<int32_t>());
326 auto* outValuesKeys = Output(3, {totalNumValues}, at::dtype<K>());
327 auto* outValuesValues = Output(4, {totalNumValues}, at::dtype<V>());
329 int32_t* outLengthsData = outLengths->template mutable_data<int32_t>();
330 int64_t* outKeysData = outKeys->template mutable_data<int64_t>();
331 int32_t* outValuesLengthsData =
332 outValuesLengths->template mutable_data<int32_t>();
333 K* outValuesKeysData = outValuesKeys->template mutable_data<K>();
334 V* outValuesValuesData = outValuesValues->template mutable_data<V>();
337 int valuesOffset = 0;
338 for (
int inputIndex = 0; inputIndex < numInputs_; ++inputIndex) {
339 inValuesOffset_[inputIndex] = 0;
341 for (
int exampleIndex = 0; exampleIndex < numExamples; ++exampleIndex) {
342 outLengthsData[exampleIndex] = 0;
343 for (
int inputIndex = 0; inputIndex < numInputs_; ++inputIndex) {
344 const int32_t* inLengthsData =
345 Input(kNumTensorsPerInput * inputIndex).template data<int32_t>();
346 const auto& inKeys =
Input(kNumTensorsPerInput * inputIndex + 1);
347 const auto& inValues =
Input(kNumTensorsPerInput * inputIndex + 2);
348 const bool* inPresenceData =
349 Input(kNumTensorsPerInput * inputIndex + 3).template data<bool>();
350 if (inPresenceData[exampleIndex]) {
351 ++outLengthsData[exampleIndex];
352 outKeysData[keysOffset] = featureIDs_[inputIndex];
353 outValuesLengthsData[keysOffset] = inLengthsData[exampleIndex];
354 context_.CopyItemsSameDevice(
356 inLengthsData[exampleIndex],
357 &inKeys.template data<K>()[inValuesOffset_[inputIndex]],
358 &outValuesKeysData[valuesOffset]);
359 context_.CopyItemsSameDevice(
361 inLengthsData[exampleIndex],
362 &inValues.template data<V>()[inValuesOffset_[inputIndex]],
363 &outValuesValuesData[valuesOffset]);
364 valuesOffset += inLengthsData[exampleIndex];
365 inValuesOffset_[inputIndex] += inLengthsData[exampleIndex];
374 const int kNumTensorsPerInput = 4;
376 std::vector<int> inValuesOffset_;
377 std::vector<int64_t> featureIDs_;
380 template <
class Context>
383 USE_OPERATOR_CONTEXT_FUNCTIONS;
385 template <
class... Args>
388 numInputs_ = InputSize() / kNumTensorsPerInput;
389 inKeysOffset_.resize(numInputs_);
391 virtual ~MergeMultiScalarFeatureTensorsOp() noexcept {}
393 bool RunOnDevice()
override {
396 call(
this,
Input(2));
399 template <
typename T>
400 bool DoRunWithType() {
401 int numExamples =
Input(0).numel();
402 int totalNumFeatures = 0;
403 for (
int inputIndex = 0; inputIndex < numInputs_; ++inputIndex) {
404 totalNumFeatures +=
Input(kNumTensorsPerInput * inputIndex + 1).numel();
407 auto* outLengths = Output(0, {numExamples}, at::dtype<int32_t>());
408 auto* outKeys = Output(1, {totalNumFeatures}, at::dtype<int64_t>());
409 auto* outValues = Output(2, {totalNumFeatures}, at::dtype<T>());
411 int32_t* outLengthsData = outLengths->template mutable_data<int32_t>();
412 int64_t* outKeysData = outKeys->template mutable_data<int64_t>();
413 T* outValuesData = outValues->template mutable_data<T>();
415 int outKeysOffset = 0;
416 for (
int inputIndex = 0; inputIndex < numInputs_; ++inputIndex) {
417 inKeysOffset_[inputIndex] = 0;
419 for (
int exampleIndex = 0; exampleIndex < numExamples; ++exampleIndex) {
420 outLengthsData[exampleIndex] = 0;
421 for (
int inputIndex = 0; inputIndex < numInputs_; ++inputIndex) {
422 const int32_t* inLengthsData =
423 Input(kNumTensorsPerInput * inputIndex).template data<int32_t>();
424 const int64_t* inKeysData =
Input(kNumTensorsPerInput * inputIndex + 1)
425 .template data<int64_t>();
426 const T* inValuesData =
427 Input(kNumTensorsPerInput * inputIndex + 2).template data<T>();
428 outLengthsData[exampleIndex] += inLengthsData[exampleIndex];
429 for (
int featureIndex = 0; featureIndex < inLengthsData[exampleIndex];
431 outKeysData[outKeysOffset] = inKeysData[inKeysOffset_[inputIndex]];
432 outValuesData[outKeysOffset] =
433 inValuesData[inKeysOffset_[inputIndex]];
435 ++inKeysOffset_[inputIndex];
444 const int kNumTensorsPerInput = 3;
446 std::vector<int> inKeysOffset_;
449 template <
class Context>
452 USE_OPERATOR_CONTEXT_FUNCTIONS;
454 template <
class... Args>
457 numFeatureInputs_ = (InputSize() - 1) / kNumTensorsPerInput;
459 virtual ~MergeMultiScalarFeatureTensorsGradientOp() noexcept {}
461 bool RunOnDevice()
override {
464 call(
this,
Input(InputSize() - 1));
467 template <
typename T>
468 bool DoRunWithType() {
469 int numExamples =
Input(0).numel();
470 std::vector<int> outValuesOffset(numFeatureInputs_);
471 for (
int inputIndex = 0; inputIndex < numFeatureInputs_; ++inputIndex) {
472 int inputNumValues = 0;
473 const int32_t* inLengthsData =
474 Input(kNumTensorsPerInput * inputIndex).template data<int32_t>();
475 for (
int exampleIndex = 0; exampleIndex < numExamples; ++exampleIndex) {
476 inputNumValues += inLengthsData[exampleIndex];
478 Output(inputIndex)->Resize(inputNumValues);
481 const auto& inValuesGrad =
Input(InputSize() - 1);
482 const T* inValuesGradData = inValuesGrad.template data<T>();
484 int inValuesOffset = 0;
485 for (
int exampleIndex = 0; exampleIndex < numExamples; ++exampleIndex) {
486 for (
int inputIndex = 0; inputIndex < numFeatureInputs_; ++inputIndex) {
487 const int32_t* inLengthsData =
488 Input(kNumTensorsPerInput * inputIndex).template data<int32_t>();
489 if (inLengthsData[exampleIndex] > 0) {
490 T* outFeatureValues = Output(inputIndex)->template mutable_data<T>();
491 context_.CopyItemsSameDevice(
492 inValuesGrad.dtype(),
493 inLengthsData[exampleIndex],
494 &inValuesGradData[inValuesOffset],
495 &outFeatureValues[outValuesOffset[inputIndex]]);
496 outValuesOffset[inputIndex] += inLengthsData[exampleIndex];
497 inValuesOffset += inLengthsData[exampleIndex];
505 int kNumTensorsPerInput = 1;
506 int numFeatureInputs_;
509 template <
class Context>
512 USE_OPERATOR_CONTEXT_FUNCTIONS;
514 template <
class... Args>
517 numInputs_ = InputSize() / kNumTensorsPerInput;
518 inKeysOffset_.resize(numInputs_);
519 inValuesValuesOffset_.resize(numInputs_);
521 virtual ~MergeMultiListFeatureTensorsOp() noexcept {}
523 bool RunOnDevice()
override {
526 call(
this,
Input(3));
529 template <
typename T>
530 bool DoRunWithType() {
531 int numExamples =
Input(0).numel();
532 int totalNumFeatures = 0;
533 int totalNumValues = 0;
534 for (
int inputIndex = 0; inputIndex < numInputs_; ++inputIndex) {
535 totalNumFeatures +=
Input(kNumTensorsPerInput * inputIndex + 1).numel();
536 totalNumValues +=
Input(kNumTensorsPerInput * inputIndex + 3).numel();
539 auto* outLengths = Output(0, {numExamples}, at::dtype<int32_t>());
540 auto* outKeys = Output(1, {totalNumFeatures}, at::dtype<int64_t>());
541 auto* outValuesLengths =
542 Output(2, {totalNumFeatures}, at::dtype<int32_t>());
543 auto* outValuesValues = Output(3, {totalNumValues}, at::dtype<T>());
545 int32_t* outLengthsData = outLengths->template mutable_data<int32_t>();
546 int64_t* outKeysData = outKeys->template mutable_data<int64_t>();
547 int32_t* outValuesLengthsData =
548 outValuesLengths->template mutable_data<int32_t>();
549 T* outValuesValuesData = outValuesValues->template mutable_data<T>();
551 int outKeysOffset = 0;
552 int outValuesValuesOffset = 0;
553 for (
int inputIndex = 0; inputIndex < numInputs_; ++inputIndex) {
554 inKeysOffset_[inputIndex] = 0;
555 inValuesValuesOffset_[inputIndex] = 0;
557 for (
int exampleIndex = 0; exampleIndex < numExamples; ++exampleIndex) {
558 outLengthsData[exampleIndex] = 0;
559 for (
int inputIndex = 0; inputIndex < numInputs_; ++inputIndex) {
560 const int32_t* inLengthsData =
561 Input(kNumTensorsPerInput * inputIndex).template data<int32_t>();
562 const int64_t* inKeysData =
Input(kNumTensorsPerInput * inputIndex + 1)
563 .template data<int64_t>();
564 const int32_t* inValuesLengthsData =
565 Input(kNumTensorsPerInput * inputIndex + 2)
566 .template data<int32_t>();
567 const auto& inValuesValues =
568 Input(kNumTensorsPerInput * inputIndex + 3);
569 outLengthsData[exampleIndex] += inLengthsData[exampleIndex];
570 for (
int featureIndex = 0; featureIndex < inLengthsData[exampleIndex];
572 outKeysData[outKeysOffset] = inKeysData[inKeysOffset_[inputIndex]];
573 outValuesLengthsData[outKeysOffset] =
574 inValuesLengthsData[inKeysOffset_[inputIndex]];
575 context_.CopyItemsSameDevice(
576 inValuesValues.dtype(),
577 inValuesLengthsData[inKeysOffset_[inputIndex]],
579 .template data<T>()[inValuesValuesOffset_[inputIndex]],
580 &outValuesValuesData[outValuesValuesOffset]);
581 outValuesValuesOffset +=
582 inValuesLengthsData[inKeysOffset_[inputIndex]];
583 inValuesValuesOffset_[inputIndex] +=
584 inValuesLengthsData[inKeysOffset_[inputIndex]];
586 ++inKeysOffset_[inputIndex];
595 const int kNumTensorsPerInput = 4;
597 std::vector<int> inKeysOffset_;
598 std::vector<int> inValuesValuesOffset_;
601 template <
class Context>
604 USE_OPERATOR_CONTEXT_FUNCTIONS;
606 template <
class... Args>
609 numInputs_ = InputSize() / kNumTensorsPerInput;
610 inKeysOffset_.resize(numInputs_);
611 inValuesValuesOffset_.resize(numInputs_);
613 virtual ~MergeMultiMapFeatureTensorsOp() noexcept {}
615 bool RunOnDevice()
override {
618 call(
this,
Input(3));
621 template <
typename K>
622 bool DoRunWithType() {
625 K>::call(
this,
Input(4));
628 template <
typename K,
typename V>
629 bool DoRunWithType2() {
630 int numExamples =
Input(0).numel();
631 int totalNumFeatures = 0;
632 int totalNumValues = 0;
633 for (
int inputIndex = 0; inputIndex < numInputs_; ++inputIndex) {
634 totalNumFeatures +=
Input(kNumTensorsPerInput * inputIndex + 1).numel();
635 totalNumValues +=
Input(kNumTensorsPerInput * inputIndex + 4).numel();
638 auto* outLengths = Output(0, {numExamples}, at::dtype<int32_t>());
639 auto* outKeys = Output(1, {totalNumFeatures}, at::dtype<int64_t>());
640 auto* outValuesLengths =
641 Output(2, {totalNumFeatures}, at::dtype<int32_t>());
642 auto* outValuesKeys = Output(3, {totalNumValues}, at::dtype<K>());
643 auto* outValuesValues = Output(4, {totalNumValues}, at::dtype<V>());
645 int32_t* outLengthsData = outLengths->template mutable_data<int32_t>();
646 int64_t* outKeysData = outKeys->template mutable_data<int64_t>();
647 int32_t* outValuesLengthsData =
648 outValuesLengths->template mutable_data<int32_t>();
649 K* outValuesKeysData = outValuesKeys->template mutable_data<K>();
650 V* outValuesValuesData = outValuesValues->template mutable_data<V>();
652 int outKeysOffset = 0;
653 int outValuesValuesOffset = 0;
654 for (
int inputIndex = 0; inputIndex < numInputs_; ++inputIndex) {
655 inKeysOffset_[inputIndex] = 0;
656 inValuesValuesOffset_[inputIndex] = 0;
658 for (
int exampleIndex = 0; exampleIndex < numExamples; ++exampleIndex) {
659 outLengthsData[exampleIndex] = 0;
660 for (
int inputIndex = 0; inputIndex < numInputs_; ++inputIndex) {
661 const int32_t* inLengthsData =
662 Input(kNumTensorsPerInput * inputIndex).template data<int32_t>();
663 const int64_t* inKeysData =
Input(kNumTensorsPerInput * inputIndex + 1)
664 .template data<int64_t>();
665 const int32_t* inValuesLengthsData =
666 Input(kNumTensorsPerInput * inputIndex + 2)
667 .template data<int32_t>();
668 const auto& inValuesKeys =
Input(kNumTensorsPerInput * inputIndex + 3);
669 const auto& inValuesValues =
670 Input(kNumTensorsPerInput * inputIndex + 4);
671 outLengthsData[exampleIndex] += inLengthsData[exampleIndex];
672 for (
int featureIndex = 0; featureIndex < inLengthsData[exampleIndex];
674 outKeysData[outKeysOffset] = inKeysData[inKeysOffset_[inputIndex]];
675 outValuesLengthsData[outKeysOffset] =
676 inValuesLengthsData[inKeysOffset_[inputIndex]];
677 context_.CopyItemsSameDevice(
678 inValuesKeys.dtype(),
679 inValuesLengthsData[inKeysOffset_[inputIndex]],
681 .template data<K>()[inValuesValuesOffset_[inputIndex]],
682 &outValuesKeysData[outValuesValuesOffset]);
683 context_.CopyItemsSameDevice(
684 inValuesValues.dtype(),
685 inValuesLengthsData[inKeysOffset_[inputIndex]],
687 .template data<V>()[inValuesValuesOffset_[inputIndex]],
688 &outValuesValuesData[outValuesValuesOffset]);
689 outValuesValuesOffset +=
690 inValuesLengthsData[inKeysOffset_[inputIndex]];
691 inValuesValuesOffset_[inputIndex] +=
692 inValuesLengthsData[inKeysOffset_[inputIndex]];
694 ++inKeysOffset_[inputIndex];
703 const int kNumTensorsPerInput = 5;
705 std::vector<int> inKeysOffset_;
706 std::vector<int> inValuesValuesOffset_;
709 template <
class Context>
712 USE_OPERATOR_CONTEXT_FUNCTIONS;
714 template <
class... Args>
717 numFeatureInputs_ = (InputSize() - 1) / kNumTensorsPerInput;
719 virtual ~MergeMultiListOrMapFeatureTensorsGradientOp() noexcept {}
721 bool RunOnDevice()
override {
724 call(
this,
Input(InputSize() - 1));
727 template <
typename T>
728 bool DoRunWithType() {
729 int numExamples =
Input(0).numel();
730 std::vector<int> outValuesLengthOffset(numFeatureInputs_);
731 std::vector<int> outValuesValuesOffset(numFeatureInputs_);
732 for (
int inputIndex = 0; inputIndex < numFeatureInputs_; ++inputIndex) {
733 int inputNumValues = 0;
734 auto& inValuesLength =
Input(kNumTensorsPerInput * inputIndex + 1);
735 const int32_t* inValuesLengthsData =
736 inValuesLength.template data<int32_t>();
737 for (
int valuesIndex = 0; valuesIndex < inValuesLength.numel();
739 inputNumValues += inValuesLengthsData[valuesIndex];
741 Output(inputIndex)->Resize(inputNumValues);
744 const auto& inValuesValuesGrad =
Input(InputSize() - 1);
745 const T* inValuesValuesGradData = inValuesValuesGrad.template data<T>();
747 int inValuesValuesOffset = 0;
748 for (
int exampleIndex = 0; exampleIndex < numExamples; ++exampleIndex) {
749 for (
int inputIndex = 0; inputIndex < numFeatureInputs_; ++inputIndex) {
750 const int32_t* inLengthsData =
751 Input(kNumTensorsPerInput * inputIndex).template data<int32_t>();
752 const int32_t* inValuesLengthsData =
753 Input(kNumTensorsPerInput * inputIndex + 1)
754 .template data<int32_t>();
755 int valuesLengthCopy = 0;
756 for (
int valuesLengthIndex = 0;
757 valuesLengthIndex < inLengthsData[exampleIndex];
758 ++valuesLengthIndex) {
759 valuesLengthCopy += inValuesLengthsData
760 [outValuesLengthOffset[inputIndex] + valuesLengthIndex];
762 if (valuesLengthCopy > 0) {
763 T* outFeatureValues = Output(inputIndex)->template mutable_data<T>();
764 context_.CopyItemsSameDevice(
765 inValuesValuesGrad.dtype(),
767 &inValuesValuesGradData[inValuesValuesOffset],
768 &outFeatureValues[outValuesValuesOffset[inputIndex]]);
770 outValuesLengthOffset[inputIndex] += inLengthsData[exampleIndex];
771 outValuesValuesOffset[inputIndex] += valuesLengthCopy;
772 inValuesValuesOffset += valuesLengthCopy;
779 int kNumTensorsPerInput = 2;
780 int numFeatureInputs_;
785 #endif // CAFFE2_OPERATORS_FEATURE_MAPS_OPS_H_
const Tensor & Input(int idx, DeviceType type=Context::GetDeviceType())
Retrieve a non-owning reference to the input at position 'idx' for this operator. ...
A global dictionary that holds information about what Caffe2 modules have been loaded in the current ...