1 #ifndef CAFFE2_OPERATORS_UTILITY_OPS_H_ 2 #define CAFFE2_OPERATORS_UTILITY_OPS_H_ 8 #include "caffe2/core/common_omp.h" 9 #include "caffe2/core/context.h" 10 #include "caffe2/core/logging.h" 11 #include "caffe2/core/operator.h" 12 #include "caffe2/core/types.h" 13 #include "caffe2/operators/gather_op.h" 14 #include "caffe2/utils/conversions.h" 15 #include "caffe2/utils/math.h" 19 template <
class Context>
22 USE_OPERATOR_CONTEXT_FUNCTIONS;
23 template <
class... Args>
27 bool RunOnDevice()
override;
35 using GradientMakerBase::GradientMakerBase;
36 std::vector<OperatorDef> GetGradientDefs()
override {
37 return {CreateOperatorDef(
40 std::vector<string>{GO(0)},
41 std::vector<string>{GI(0)})};
45 template <
class Context>
48 USE_OPERATOR_CONTEXT_FUNCTIONS;
52 bool RunOnDevice()
override {
57 bool DoRunWithType() {
59 auto* Y = Output(0, X.sizes(), at::dtype<uint8_t>());
60 const auto* X_data = X.template data<T>();
61 uint8_t* Y_data = Y->template mutable_data<uint8_t>();
62 for (
size_t i = 0; i < X.numel(); i++) {
63 Y_data[i] = (uint8_t)(std::isnan(X_data[i]));
69 template <
class Context>
72 USE_OPERATOR_CONTEXT_FUNCTIONS;
74 template <
class... Args>
78 bool RunOnDevice()
override {
79 int64_t nanoseconds =
static_cast<long int>(
80 std::chrono::duration_cast<std::chrono::nanoseconds>(
81 std::chrono::high_resolution_clock::now().time_since_epoch())
86 *output->template mutable_data<int64_t>() = nanoseconds;
92 const char kPrintFileExtension[] =
".log";
94 template <
class Context>
97 USE_OPERATOR_CONTEXT_FUNCTIONS;
102 operator_def.input(0),
103 this->
template GetSingleArgument<int>(
"to_file", 0)
104 ? ws->
RootFolder() +
"/" + operator_def.input(0) +
107 this->
template GetSingleArgument<int>(
"limit", 0)),
108 every_n_(this->
template GetSingleArgument<int>(
"every_n", 1)) {
109 CAFFE_ENFORCE_GE(every_n_, 1);
112 bool RunOnDevice()
override {
113 if (++occurrences_mod_n_ > every_n_) {
114 occurrences_mod_n_ -= every_n_;
116 if (occurrences_mod_n_ != 1) {
120 if (!this->InputIsTensorType(0, Context::GetDeviceType()) &&
121 !this->InputIsTensorType(0, CPU)) {
122 LOG(INFO) <<
"Blob of type: " 123 << OperatorBase::Inputs().at(0)->meta().name();
127 if (
Input(0).numel() == 0) {
128 tensor_printer_.PrintMeta(
Input(0));
142 if (this->InputIsTensorType(0, CPU)) {
144 this, this->
template Input<Tensor>(0, CPU));
151 template <
typename T>
152 bool DoRunWithType() {
157 Tensor tensor_copy_if_needed(CPU);
158 if (this->InputIsTensorType(0, CPU)) {
159 tensor = &this->
template Input<Tensor>(0, CPU);
162 tensor_copy_if_needed.CopyFrom(
Input(0));
163 tensor = &tensor_copy_if_needed;
165 tensor_printer_.Print<
T>(*tensor);
172 int occurrences_mod_n_{0};
189 template <
class Context>
192 USE_OPERATOR_CONTEXT_FUNCTIONS;
195 bool RunOnDevice()
override {
196 auto& input =
Input(0);
197 CAFFE_ENFORCE_GE(input.numel(), 0,
"Tensor is not initialized");
198 OutputTensorAlias(0, input);
210 template <
class Context>
213 USE_OPERATOR_CONTEXT_FUNCTIONS;
216 bool RunOnDevice()
override {
217 const auto& input =
Input(0);
218 auto* output = Output(0);
219 CAFFE_ENFORCE_GT(input.dim(), 0,
"Input has to be at least a vector.");
222 if (&input != output) {
223 output->ResizeLike(input);
224 output->CopyFrom(input,
true );
230 template <
class Context>
233 USE_OPERATOR_CONTEXT_FUNCTIONS;
236 bool RunOnDevice()
override {
237 auto& input =
Input(0);
238 auto* output = Output(0);
240 input.dim(), 1,
"The rank of the tensor must be >= 1.");
241 output->Resize(input.numel());
243 context_.CopyItemsSameDevice(
247 output->raw_mutable_data(input.dtype()));
253 template <
class Context>
256 USE_OPERATOR_CONTEXT_FUNCTIONS;
259 bool RunOnDevice()
override {
260 auto& input0 =
Input(0);
261 auto& input1 =
Input(1);
262 auto* output = Output(0);
263 CAFFE_ENFORCE_EQ(input0.numel(), input1.numel());
264 output->ResizeLike(
Input(1));
265 context_.CopyItemsSameDevice(
269 output->raw_mutable_data(input0.dtype()));
274 template <
class Context>
277 USE_OPERATOR_CONTEXT_FUNCTIONS;
278 USE_SIMPLE_CTOR_DTOR(
SumOp);
280 template <
typename T,
typename M>
281 bool DoRunWithType() {
282 auto& input0 =
Input(0);
284 if (InputSize() == 1) {
286 OutputTensorCopyFrom(
290 at::dtype(input0.dtype()),
295 auto* output = Output(0, input0.sizes(), at::dtype<T>());
296 T* output_data = output->template mutable_data<T>();
298 for (
int i = 1; i < InputSize(); ++i) {
299 if (output->sizes() !=
Input(i).sizes()) {
301 "Check failed: output->sizes() == Input(i).sizes().",
302 "Description: Input #",
304 ", input dimension:",
306 " should match output dimension: ",
314 input0.template data<T>(),
315 Input(1).template data<T>(),
319 for (
int i = 2; i < InputSize(); ++i) {
323 Input(i).template data<T>(),
330 bool RunOnDevice()
override {
331 if (
Input(0).
template IsType<float>()) {
332 return DoRunWithType<float, float>();
333 }
else if (
Input(0).template IsType<int>()) {
334 return DoRunWithType<int, int>();
337 "Sum operator only supports 32-bit float and ints, but",
338 " input was of type ",
339 Input(0).dtype().name());
345 const OperatorDef& def,
346 const std::vector<TensorShape>& in) {
348 cost.flops *= (in.size() - 1);
349 cost.params_bytes = 0;
358 template <
class Context>
361 USE_OPERATOR_CONTEXT_FUNCTIONS;
364 bool RunOnDevice()
override;
366 template <
typename T>
367 bool DoRunWithType() {
368 const int input_size = this->InputSize();
369 CAFFE_ENFORCE_EQ(input_size % 2, 0);
370 const auto& X0 =
Input(0);
371 const auto& weight0 =
Input(1);
372 CAFFE_ENFORCE_GT(X0.numel(), 0);
373 CAFFE_ENFORCE_EQ(weight0.numel(), 1);
374 const int size = X0.numel();
377 auto* Y = Output(0, X0.sizes(), at::dtype<T>());
378 T* Y_data = Y->template mutable_data<T>();
379 if (input_size == 2) {
380 math::Scale<float, T>(
382 weight0.template data<float>(),
383 X0.template data<T>(),
388 const auto& X1 =
Input(2);
390 !IsInputOutputAlias(2, 0),
391 "Input #2 is the same as output. If you want to do in-place updates, " 392 "put the output as input #0.");
393 const auto& weight1 =
Input(3);
394 CAFFE_ENFORCE_EQ(X1.numel(), size);
395 CAFFE_ENFORCE_EQ(weight1.numel(), 1);
396 if (!IsInputOutputAlias(0, 0)) {
397 context_.template CopySameDevice<T>(size, X0.template data<T>(), Y_data);
399 math::Axpby<float, T, Context>(
401 weight1.template data<float>(),
402 X1.template data<T>(),
403 weight0.template data<float>(),
406 for (
int i = 4; i < input_size; i += 2) {
407 const auto& Xi =
Input(i);
410 const std::string err_msg =
"Input #" + to_string(i) +
411 " is the same as output. If you want to do in-place updates, " 412 "put the output as input #0.";
413 CAFFE_ENFORCE(!IsInputOutputAlias(i, 0), err_msg);
414 const auto& weighti =
Input(i + 1);
415 CAFFE_ENFORCE_EQ(Xi.numel(), size);
416 CAFFE_ENFORCE_EQ(weighti.numel(), 1);
417 math::Axpy<T, Context>(
419 weighti.template data<float>(),
420 Xi.template data<T>(),
428 template <
class Context>
431 USE_OPERATOR_CONTEXT_FUNCTIONS;
433 template <
class... Args>
436 grad_on_w_(this->
template GetSingleArgument<bool>(
"grad_on_w",
false)) {
439 template <
typename DstType>
440 bool DoRunWithType() {
441 CAFFE_ENFORCE_EQ(InputSize() % 2, 1);
442 auto output_size = grad_on_w_ ? InputSize() - 1 : InputSize() / 2;
443 CAFFE_ENFORCE_EQ(OutputSize(), output_size);
446 const auto* dY_data = dY.template data<DstType>();
447 int size = dY.numel();
450 for (
int i = 0; i < InputSize() / 2; i++) {
451 auto& cur_w =
Input(2 * i + 2);
452 CAFFE_ENFORCE_EQ(cur_w.numel(), 1);
454 auto* cur_dX = Output(i, dY.sizes(), at::dtype<DstType>());
456 math::Scale<float, DstType, Context>(
458 cur_w.template data<float>(),
460 cur_dX->template mutable_data<DstType>(),
464 auto& cur_X =
Input(2 * i + 1);
465 CAFFE_ENFORCE_EQ(cur_X.numel(), size);
466 auto* cur_dw = Output(i + output_size / 2);
468 math::Dot<DstType, Context>(
471 cur_X.template data<DstType>(),
472 cur_dw->template mutable_data<float>(),
480 bool RunOnDevice()
override;
524 template <
typename T,
class Context>
527 USE_OPERATOR_CONTEXT_FUNCTIONS;
531 bool RunOnDevice()
override {
536 template <
typename Index>
537 bool DoRunWithType() {
538 int64_t block_size =
Input(0).size_from_dim(1);
542 template <
typename Index,
int FixedSize>
543 bool DoRunWithValue() {
544 CAFFE_ENFORCE_EQ(InputSize() % 2, 1);
546 auto& weight0 =
Input(1);
547 auto& indices =
Input(2);
548 auto* output = Output(0);
549 CAFFE_ENFORCE_EQ(&X0, output,
"In place operation is required");
551 CAFFE_ENFORCE_GT(X0.numel(), 0);
552 CAFFE_ENFORCE_GT(X0.dim(), 0,
"X0 has to be at least the vector");
553 CAFFE_ENFORCE_EQ(weight0.numel(), 1);
554 int64_t
M = X0.numel();
555 int64_t N = X0.size(0);
556 int64_t K = indices.numel();
557 int64_t block_size = M / N;
558 T* data = output->template mutable_data<T>();
559 const Index* idxs = indices.template data<Index>();
560 T w0 = *weight0.template data<T>();
563 for (
int i = 0; i < K; ++i) {
567 "Index out of bounds: ",
571 math::ScaleFixedSize<T, Context, FixedSize>(
574 data + block_size * idx,
575 data + block_size * idx,
579 for (
int inp = 3; inp < InputSize(); inp += 2) {
580 auto& X =
Input(inp);
581 auto& weight =
Input(inp + 1);
582 CAFFE_ENFORCE_EQ(X.numel(), block_size * K);
583 CAFFE_ENFORCE_EQ(weight.numel(), 1);
584 const T* x_data = X.template data<T>();
585 T w = *weight.template data<T>();
586 for (
int i = 0; i < K; ++i) {
589 DCHECK(0 <= idx && idx < N)
590 <<
"Index out of bounds: " << idx <<
", range 0 to " << N;
591 math::AxpyFixedSize<T, Context, FixedSize>(
594 x_data + block_size * i,
595 data + block_size * idx,
630 template <
class Context>
633 USE_OPERATOR_CONTEXT_FUNCTIONS;
636 template <
class... Args>
639 runners_({{{TensorProto_DataType_INT32, TensorProto_DataType_FLOAT},
640 &ScatterAssignOp::DoRun<int32_t, float>},
641 {{TensorProto_DataType_INT32, TensorProto_DataType_FLOAT16},
642 &ScatterAssignOp::DoRun<int32_t, at::Half>},
643 {{TensorProto_DataType_INT32, TensorProto_DataType_UINT8},
644 &ScatterAssignOp::DoRun<int32_t, uint8_t>},
645 {{TensorProto_DataType_INT32, TensorProto_DataType_INT32},
646 &ScatterAssignOp::DoRun<int32_t, int32_t>},
647 {{TensorProto_DataType_INT32, TensorProto_DataType_INT64},
648 &ScatterAssignOp::DoRun<int32_t, int64_t>},
649 {{TensorProto_DataType_INT64, TensorProto_DataType_FLOAT},
650 &ScatterAssignOp::DoRun<int64_t, float>},
651 {{TensorProto_DataType_INT64, TensorProto_DataType_FLOAT16},
652 &ScatterAssignOp::DoRun<int64_t, at::Half>},
653 {{TensorProto_DataType_INT64, TensorProto_DataType_UINT8},
654 &ScatterAssignOp::DoRun<int64_t, uint8_t>},
655 {{TensorProto_DataType_INT64, TensorProto_DataType_INT32},
656 &ScatterAssignOp::DoRun<int64_t, int32_t>},
657 {{TensorProto_DataType_INT64, TensorProto_DataType_INT64},
658 &ScatterAssignOp::DoRun<int64_t, int64_t>}}) {}
660 bool RunOnDevice()
override {
661 const auto& data =
Input(DATA);
662 const auto& slices =
Input(SLICES);
663 auto& indices =
Input(INDICES);
665 const auto dataType = TypeMetaToDataType(data.dtype());
666 const auto slicesType = TypeMetaToDataType(slices.dtype());
667 const auto indicesType = TypeMetaToDataType(indices.dtype());
668 auto* output = Output(0);
670 auto runner = GetRunner(dataType, slicesType, indicesType);
676 typedef void (ScatterAssignOp::*RunnerType)();
678 map<std::pair<TensorProto_DataType, TensorProto_DataType>, RunnerType>
683 RunnerType GetRunner(
684 const TensorProto_DataType dataType,
685 const TensorProto_DataType slicesType,
686 const TensorProto_DataType indicesType) {
687 CAFFE_ENFORCE_EQ(dataType, slicesType,
"Data and slice types must match");
688 auto it = runners_.find({indicesType, dataType});
690 it != runners_.end(),
691 "Could not find the runner corresponding to indicesType, dataType = ",
698 template <
typename Index,
typename T>
700 auto& input =
Input(DATA);
701 auto& indices =
Input(INDICES);
702 auto& slices =
Input(SLICES);
703 auto* output = Output(0);
704 CAFFE_ENFORCE_EQ(&input, output,
"In place operation is required");
706 CAFFE_ENFORCE_GT(input.dim(), 0,
"X0 has to be at least the vector");
707 int64_t
M = input.numel();
708 int64_t N = input.size(0);
709 int64_t K = indices.numel();
710 int64_t block_size = M / N;
711 CAFFE_ENFORCE_EQ(slices.numel(), block_size * K);
714 T* data = output->template mutable_data<T>();
715 const Index* idxs = indices.template data<Index>();
716 const T* slicesData = slices.template data<T>();
717 DoScatterAssign(data, idxs, slicesData, N, K, block_size);
720 template <
typename Index,
typename T>
721 void DoScatterAssign(
727 int64_t block_size) {
728 for (
int i = 0; i < K; ++i) {
731 DCHECK(0 <= idx && idx < N)
732 <<
"Index out of bounds: " << idx <<
", range 0 to " << N;
733 context_.template CopySameDevice<T>(
734 block_size, slicesData + block_size * i, data + block_size * idx);
738 INPUT_TAGS(DATA, INDICES, SLICES);
741 template <
class Context>
744 USE_OPERATOR_CONTEXT_FUNCTIONS;
747 bool RunOnDevice()
override {
748 auto& input =
Input(0);
749 auto* output = Output(0);
750 auto* input_data = input.template data<int32_t>();
752 CAFFE_ENFORCE(input.sizes().size() == 1,
"Input must be a vector.");
754 std::accumulate(input_data, input_data + input.numel(), 0);
756 output->Resize(total_length);
757 auto* output_data = output->template mutable_data<int32_t>();
759 for (
int i = 0; i < input.numel(); ++i) {
760 auto len = input_data[i];
761 std::fill(output_data, output_data + len, i);
768 template <
class Context>
771 USE_OPERATOR_CONTEXT_FUNCTIONS;
774 bool RunOnDevice()
override {
775 auto& input =
Input(0);
776 auto* output = Output(0);
777 auto* input_data = input.template data<int32_t>();
779 CAFFE_ENFORCE(input.sizes().size() == 1,
"Input must be a vector.");
780 auto size = input.numel();
782 output->Resize(size, 2);
783 auto* output_data = output->template mutable_data<int32_t>();
786 for (
int i = 0; i < size; ++i) {
787 auto len = input_data[i];
788 output_data[i * 2] = offset;
789 output_data[i * 2 + 1] = len;
796 template <
class Context>
799 USE_OPERATOR_CONTEXT_FUNCTIONS;
802 bool RunOnDevice()
override {
806 template <
typename Index>
807 bool DoRunWithType() {
808 auto& input =
Input(0);
809 if (input.dim() == 2) {
811 input.dim32(0) == 1 || input.dim32(1) == 1,
812 "Input must be a vector.");
814 CAFFE_ENFORCE_EQ(input.dim(), 1,
"Input must be a vector.");
816 auto* input_data = input.template data<Index>();
817 auto input_size = input.numel();
818 auto* output = Output(0);
820 auto num_segments = input_size ? input_data[input_size - 1] + 1 : 0;
821 if (InputSize() > 1) {
822 CAFFE_ENFORCE_GE(
Input(1).dim(), 1);
826 "The number of segments inferred should *NOT* be larger " 827 "than the size of Input(1)'s first dimension");
828 num_segments =
Input(1).size(0);
830 CAFFE_ENFORCE(0 <= num_segments,
"Indices must be in 0..K-1 range");
831 output->Resize(num_segments);
832 auto* output_data = output->template mutable_data<int32_t>();
833 if (num_segments == 0) {
836 std::fill(output_data, output_data + num_segments, 0);
838 for (int64_t i = 0; i < input_size; i++) {
840 prev <= input_data[i],
841 "Segment ids must be sorted: ",
845 prev = input_data[i];
846 output_data[input_data[i]] += 1;
853 template <
class Context>
856 USE_OPERATOR_CONTEXT_FUNCTIONS;
859 bool RunOnDevice()
override {
863 template <
typename Index>
864 bool DoRunWithType() {
865 auto& input =
Input(0);
866 CAFFE_ENFORCE(input.sizes().size() == 1,
"Input must be a vector.");
867 auto* input_data = input.template data<Index>();
868 auto input_size = input.numel();
869 auto* output = Output(0);
871 auto num_segments = input_size ? input_data[input_size - 1] + 1 : 0;
872 if (InputSize() > 1) {
873 CAFFE_ENFORCE_GE(
Input(1).dim(), 1);
877 "The number of segments inferred should *NOT* be larger " 878 "than the size of Input(1)'s first dimension");
879 num_segments =
Input(1).size(0);
881 CAFFE_ENFORCE(0 <= num_segments,
"Indices must be in 0..K-1 range");
882 output->Resize(num_segments, 2);
883 auto* output_data = output->template mutable_data<int32_t>();
884 if (num_segments == 0) {
887 std::fill(output_data, output_data + num_segments * 2, 0);
888 Index prev = input_data[0];
889 for (int64_t i = 0; i < input_size; i++) {
891 prev <= input_data[i],
892 "Segment ids must be sorted: ",
896 while (prev != input_data[i]) {
898 output_data[prev * 2] = i;
900 output_data[input_data[i] * 2 + 1] += 1;
907 template <
class Context>
910 USE_OPERATOR_CONTEXT_FUNCTIONS;
911 template <
class... Args>
914 power_(this->
template GetSingleArgument<float>(
"power", 0.5)) {}
916 bool RunOnDevice()
override {
920 template <
typename Index>
921 bool DoRunWithType() {
922 auto& input =
Input(0);
923 CAFFE_ENFORCE(input.sizes().size() == 1,
"Input must be a vector.");
924 auto* input_data = input.template data<Index>();
925 auto input_size = input.numel();
926 auto* output = Output(0);
928 int64_t output_size = 0;
929 for (
auto i = 0; i < input_size; i++) {
930 CAFFE_ENFORCE_GE(input_data[i], 0,
"unexpected negative length value");
931 output_size += input_data[i];
934 std::function<float(const int64_t& length, const float& power)> getWeight;
936 getWeight = [](
const int64_t& length,
const float& ) {
937 return 1.0 / std::sqrt(length);
939 }
else if (power_ == 1) {
940 getWeight = [](
const int64_t& length,
const float& ) {
944 getWeight = [](
const int64_t& length,
const float& power) {
945 return 1.0 / std::pow(length, power);
949 output->Resize(output_size);
950 auto* output_data = output->template mutable_data<float>();
952 for (
auto i = 0; i < input_size; i++) {
953 auto len = input_data[i];
957 CAFFE_ENFORCE_LE(cnt + len, output_size,
"unexpected lengths value");
959 float weight_value = getWeight(len, power_);
960 std::fill(output_data + cnt, output_data + cnt + len, weight_value);
971 template <
class Context>
974 USE_OPERATOR_CONTEXT_FUNCTIONS;
977 bool RunOnDevice()
override {
978 auto& input =
Input(0);
979 auto* output = Output(0);
980 output->Resize(std::vector<int64_t>{});
981 *output->template mutable_data<bool>() = input.numel() > 0;
987 template <
class Context>
990 USE_OPERATOR_CONTEXT_FUNCTIONS;
991 USE_SIMPLE_CTOR_DTOR(
SizeOp);
993 bool RunOnDevice()
override {
994 auto& input =
Input(0);
996 auto* output = Output(0, vector<int64_t>(), at::dtype<int64_t>());
997 auto* output_data = output->template mutable_data<int64_t>();
999 auto size = input.numel();
1000 math::Set<int64_t, Context>(
1001 1,
static_cast<int64_t
>(size), output_data, &context_);
1008 template <
class Context>
1011 USE_OPERATOR_CONTEXT_FUNCTIONS;
1014 bool RunOnDevice()
override {
1015 auto& input =
Input(0);
1017 CAFFE_ENFORCE(input.sizes().size() == 1,
"Input must be a vector.");
1018 auto* output = Output(0);
1019 auto* input_data = input.template data<int32_t>();
1021 auto size = input.numel();
1022 auto first = input_data[0];
1024 for (
int i = 1; i < size; i++) {
1026 input_data[i] == first,
"All elements of input must be same ");
1030 auto* output_data = output->template mutable_data<int32_t>();
1031 output_data[0] = size;
1032 output_data[1] = first;
1038 template <
class Context>
1041 USE_OPERATOR_CONTEXT_FUNCTIONS;
1044 bool RunOnDevice()
override {
1046 this, this->
template Input<Tensor>(RANGES, CPU));
1049 template <
typename Index>
1050 bool DoRunWithType() {
1051 auto& data =
Input(DATA);
1052 auto& ranges =
Input(RANGES);
1053 auto* outputData = Output(0);
1054 auto* outputLengths = Output(1);
1056 auto batchSize = ranges.size(0);
1057 CAFFE_ENFORCE(data.dim() == 1,
"Data has to be 1-D");
1058 CAFFE_ENFORCE(ranges.dim() == 3,
"Ranges must be 3-D");
1059 CAFFE_ENFORCE(ranges.size(1) > 0,
"There has to be at least one range");
1061 ranges.size(2), 2,
"Ranges last dimention should be of size 2");
1063 auto* rawData =
static_cast<const char*
>(data.raw_data());
1064 auto* rangesData = ranges.template data<Index>();
1066 outputLengths->Resize(batchSize);
1067 auto* outputLengthsPtr = outputLengths->template mutable_data<int32_t>();
1069 size_t blockSize = ranges.size_from_dim(1);
1070 for (
size_t i = 0; i < batchSize; ++i) {
1071 auto end = start + blockSize;
1072 outputLengthsPtr[i] = accumulate(rangesData, start, end);
1076 size_t outputSize = accumulate(rangesData, 0, ranges.numel());
1077 outputData->Resize(outputSize);
1079 auto outputRawData =
1080 static_cast<char*
>(outputData->raw_mutable_data(data.dtype()));
1081 VLOG(1) <<
"Copying data";
1082 size_t outputOffsetBytes = 0;
1083 auto itemsize = data.dtype().itemsize();
1084 for (
int i = 0; i < ranges.numel(); i += 2) {
1085 auto rangeStart = rangesData[i];
1086 auto rangeLength = rangesData[i + 1];
1090 auto rangeSizeBytes = rangeLength * itemsize;
1091 CAFFE_ENFORCE(outputOffsetBytes < outputSize * itemsize);
1092 CAFFE_ENFORCE(rangeStart + rangeLength <= data.numel());
1093 context_.CopyItemsSameDevice(
1096 rawData + rangeStart * itemsize,
1097 outputRawData + outputOffsetBytes);
1098 outputOffsetBytes += rangeSizeBytes;
1100 CAFFE_ENFORCE(outputOffsetBytes == outputSize * itemsize);
1104 INPUT_TAGS(DATA, RANGES, LENGTHS);
1107 template <
typename Index>
1108 size_t accumulate(
Index* ranges,
size_t start,
size_t end) {
1110 for (
size_t i = start + 1; i < end; i += 2) {
1111 result += ranges[i];
1117 template <
class Context>
1120 USE_OPERATOR_CONTEXT_FUNCTIONS;
1123 bool RunOnDevice()
override {
1125 this, this->
template Input<Tensor>(INDICES, CPU));
1128 template <
typename Index>
1129 bool DoRunWithType() {
1130 auto& items =
Input(ITEMS);
1131 auto& lengths =
Input(LENGTHS);
1132 auto& indices =
Input(INDICES);
1133 auto* output = Output(0);
1135 CAFFE_ENFORCE_GE(items.dim(), 1,
"ITEMS should be at least 1-D");
1136 CAFFE_ENFORCE_EQ(lengths.dim(), 1,
"LENGTHS should be 1-D");
1137 CAFFE_ENFORCE_EQ(indices.dim(), 1,
"INDICES should be 1-D");
1139 const auto* lengths_data = lengths.template data<int32_t>();
1140 const auto* indices_data = indices.template data<Index>();
1142 int64_t total_length = 0;
1143 for (
size_t i = 0; i < indices.numel(); ++i) {
1144 auto idx = indices_data[i];
1145 CAFFE_ENFORCE_LT(idx, lengths.numel());
1146 total_length += lengths_data[idx];
1148 auto shape = items.sizes().vec();
1149 shape[0] = total_length;
1150 output->Resize(shape);
1153 int64_t running_offset = 0;
1154 offsets_.reserve(lengths.numel());
1155 for (
size_t i = 0; i < lengths.numel(); ++i) {
1156 offsets_.push_back(running_offset);
1157 running_offset += lengths_data[i];
1162 "LENGTHS must match the first dimension of ITEMS");
1164 auto src_base =
static_cast<const char*
>(items.raw_data());
1165 auto block_size = items.size_from_dim(1);
1166 auto block_bytesize = block_size * items.itemsize();
1167 auto out =
static_cast<char*
>(output->raw_mutable_data(items.dtype()));
1169 for (
size_t i = 0; i < indices.numel(); ++i) {
1170 auto idx = indices_data[i];
1171 auto length = lengths_data[idx];
1172 context_.CopyItemsSameDevice(
1174 length * block_size,
1175 src_base + offsets_[idx] * block_bytesize,
1177 out += length * block_bytesize;
1182 std::vector<int64_t> offsets_;
1184 INPUT_TAGS(ITEMS, LENGTHS, INDICES);
1187 template <
typename T,
class Context>
1190 template <
class... Args>
1194 this->
template GetSingleArgument<float>(
"lower_bound", 0.0)),
1196 this->
template GetSingleArgument<float>(
"upper_bound", 1.0)),
1197 num_buckets_(this->
template GetSingleArgument<int>(
"num_buckets", 1)) {
1198 CAFFE_ENFORCE_GT(num_buckets_, 0);
1200 num_output_buckets_ = num_buckets_ + 2;
1201 accumulate_hist_ = std::vector<int64_t>(num_output_buckets_, 0);
1204 USE_OPERATOR_CONTEXT_FUNCTIONS;
1206 bool RunOnDevice()
override {
1207 auto& X =
Input(X_IN);
1208 auto* X_data = X.template data<T>();
1210 auto* cur_hist = Output(CUR_HIST);
1211 auto* acc_hist = Output(ACC_HIST);
1212 cur_hist->Resize(num_output_buckets_);
1213 acc_hist->Resize(num_output_buckets_);
1214 auto* cur_hist_data = cur_hist->template mutable_data<int64_t>();
1215 auto* acc_hist_data = acc_hist->template mutable_data<int64_t>();
1216 auto segment = (upper_bound_ - lower_bound_) / num_buckets_;
1217 math::Set<int64_t, Context>(
1218 num_output_buckets_, 0, cur_hist_data, &context_);
1220 for (
int i = 0; i < N; i++) {
1221 int bucket_index = -1;
1222 if (X_data[i] < lower_bound_) {
1224 }
else if (X_data[i] >= upper_bound_) {
1225 bucket_index = num_buckets_ + 1;
1227 bucket_index = (int)((X_data[i] - lower_bound_) / segment) + 1;
1229 cur_hist_data[bucket_index] += 1;
1230 accumulate_hist_[bucket_index] += 1;
1233 for (
int i = 0; i < num_output_buckets_; i++) {
1234 acc_hist_data[i] = accumulate_hist_[i];
1244 int num_output_buckets_;
1245 std::vector<int64_t> accumulate_hist_;
1248 OUTPUT_TAGS(CUR_HIST, ACC_HIST);
1251 template <
class Context>
1254 USE_OPERATOR_CONTEXT_FUNCTIONS;
1257 bool RunOnDevice()
override {
1262 template <
typename T>
1263 T readScalarInput(
const int index) {
1264 if (std::is_same<Context, TensorCPU>::value) {
1265 return Input(index).template data<T>()[0];
1267 local_.CopyFrom(
Input(index));
1268 return local_.template data<T>()[0];
1272 template <
typename T>
1273 bool DoRunWithType() {
1278 for (
int i = 0; i < InputSize(); ++i) {
1279 CAFFE_ENFORCE_EQ(
Input(0).dim(), 0,
"All inputs must be scalar.");
1282 switch (InputSize()) {
1284 stop = readScalarInput<T>(0);
1287 start = readScalarInput<T>(0);
1288 stop = readScalarInput<T>(1);
1291 step = readScalarInput<T>(2);
1292 start = readScalarInput<T>(0);
1293 stop = readScalarInput<T>(1);
1296 CAFFE_ENFORCE_NE(step, 0,
"Step size cannot be 0.");
1298 auto diff = stop - start;
1299 if (std::is_integral<T>::value) {
1302 length = diff / step;
1303 if (length * step < diff) {
1307 length =
static_cast<int>(ceil(diff / step));
1312 Output(0, {0}, at::dtype<T>());
1315 auto* output = Output(0, {length}, at::dtype<T>());
1316 return DoRunOnDevice<T>(start, step, output);
1320 template <
typename T>
1321 bool DoRunOnDevice(
const T& start,
const T& step,
Tensor* output);
1330 template <
class... Args>
1333 message_(GetSingleArgument<std::string>(
1335 "Exception from ThrowExceptionOp")) {}
1337 bool RunOnDevice()
override {
1338 CAFFE_THROW(message_);
1342 const std::string message_;
1347 template <
class... Args>
1350 message_(GetSingleArgument<std::string>(
1352 "Exception from ThrowChildThreadExceptionOp")) {}
1354 bool RunOnDevice()
override {
1355 std::thread t([
this]() { CAFFE_THROW(this->message_); });
1362 const std::string message_;
1367 template <
class... Args>
1370 message_(GetSingleArgument<std::string>(
1372 "Logging from LogFatalOp")) {}
1374 bool RunOnDevice()
override {
1375 LOG(FATAL) << message_;
1380 const std::string message_;
1385 template <
class... Args>
1386 explicit FailOp(Args&&... args)
1389 bool RunOnDevice()
override {
1396 #endif // CAFFE2_OPERATORS_UTILITY_OPS_H_ const string & RootFolder()
Return the root folder of the workspace.
Update slices of the tensor in-place by overriding.
Workspace is a class that holds all the related objects created during runtime: (1) all blobs...
const Tensor & Input(int idx, DeviceType type=Context::GetDeviceType())
Retrieve a non-owning reference to the input at position 'idx' for this operator. ...
A global dictionary that holds information about what Caffe2 modules have been loaded in the current ...
Update slices of the tensor in-place with weighted sum.
Alias op makes the output and the input share the same underlying storage.