Caffe2 - C++ API
A deep learning, cross platform ML framework
utility_ops.h
1 #ifndef CAFFE2_OPERATORS_UTILITY_OPS_H_
2 #define CAFFE2_OPERATORS_UTILITY_OPS_H_
3 
4 #include <cmath>
5 #include <map>
6 #include <utility>
7 
8 #include "caffe2/core/common_omp.h"
9 #include "caffe2/core/context.h"
10 #include "caffe2/core/logging.h"
11 #include "caffe2/core/operator.h"
12 #include "caffe2/core/types.h"
13 #include "caffe2/operators/gather_op.h"
14 #include "caffe2/utils/conversions.h"
15 #include "caffe2/utils/math.h"
16 
17 namespace caffe2 {
18 
19 template <class Context>
20 class NanCheckOp final : public Operator<Context> {
21  public:
22  USE_OPERATOR_CONTEXT_FUNCTIONS;
23  template <class... Args>
24  explicit NanCheckOp(Args&&... args)
25  : Operator<Context>(std::forward<Args>(args)...) {}
26 
27  bool RunOnDevice() override;
28 
29  private:
30  TensorPrinter tensorPrinter_;
31  Tensor scratch_;
32 };
33 
35  using GradientMakerBase::GradientMakerBase;
36  std::vector<OperatorDef> GetGradientDefs() override {
37  return {CreateOperatorDef(
38  "NanCheck",
39  "",
40  std::vector<string>{GO(0)},
41  std::vector<string>{GI(0)})};
42  }
43 };
44 
45 template <class Context>
46 class IsNanOp final : public Operator<Context> {
47  public:
48  USE_OPERATOR_CONTEXT_FUNCTIONS;
49  IsNanOp(const OperatorDef& operator_def, Workspace* ws)
50  : Operator<Context>(operator_def, ws) {}
51 
52  bool RunOnDevice() override {
53  return DispatchHelper<TensorTypes<float, double>>::call(this, Input(0));
54  }
55 
56  template <typename T>
57  bool DoRunWithType() {
58  auto& X = Input(0);
59  auto* Y = Output(0, X.sizes(), at::dtype<uint8_t>());
60  const auto* X_data = X.template data<T>();
61  uint8_t* Y_data = Y->template mutable_data<uint8_t>();
62  for (size_t i = 0; i < X.numel(); i++) {
63  Y_data[i] = (uint8_t)(std::isnan(X_data[i]));
64  }
65  return true;
66  }
67 };
68 
69 template <class Context>
70 class WallClockTimeOp final : public Operator<Context> {
71  public:
72  USE_OPERATOR_CONTEXT_FUNCTIONS;
73 
74  template <class... Args>
75  explicit WallClockTimeOp(Args&&... args)
76  : Operator<Context>(std::forward<Args>(args)...) {}
77 
78  bool RunOnDevice() override {
79  int64_t nanoseconds = static_cast<long int>(
80  std::chrono::duration_cast<std::chrono::nanoseconds>(
81  std::chrono::high_resolution_clock::now().time_since_epoch())
82  .count());
83 
84  TensorCPU* output = Output(0);
85  output->Resize();
86  *output->template mutable_data<int64_t>() = nanoseconds;
87 
88  return true;
89  }
90 };
91 
92 const char kPrintFileExtension[] = ".log";
93 
94 template <class Context>
95 class PrintOp final : public Operator<Context> {
96  public:
97  USE_OPERATOR_CONTEXT_FUNCTIONS;
98  USE_DISPATCH_HELPER;
99  explicit PrintOp(const OperatorDef& operator_def, Workspace* ws)
100  : Operator<Context>(operator_def, ws),
101  tensor_printer_(
102  operator_def.input(0),
103  this->template GetSingleArgument<int>("to_file", 0)
104  ? ws->RootFolder() + "/" + operator_def.input(0) +
105  kPrintFileExtension
106  : "",
107  this->template GetSingleArgument<int>("limit", 0)),
108  every_n_(this->template GetSingleArgument<int>("every_n", 1)) {
109  CAFFE_ENFORCE_GE(every_n_, 1);
110  }
111 
112  bool RunOnDevice() override {
113  if (++occurrences_mod_n_ > every_n_) {
114  occurrences_mod_n_ -= every_n_;
115  }
116  if (occurrences_mod_n_ != 1) {
117  return true;
118  }
119 
120  if (!this->InputIsTensorType(0, Context::GetDeviceType()) &&
121  !this->InputIsTensorType(0, CPU)) {
122  LOG(INFO) << "Blob of type: "
123  << OperatorBase::Inputs().at(0)->meta().name();
124  return true;
125  }
126  // special-case empty tensors since they may have no meta()
127  if (Input(0).numel() == 0) {
128  tensor_printer_.PrintMeta(Input(0));
129  return true;
130  }
131 
132  using Types = TensorTypes<
133  float,
134  double,
135  int,
136  long,
137  bool,
138  char,
139  unsigned char,
140  std::string>;
141 
142  if (this->InputIsTensorType(0, CPU)) {
144  this, this->template Input<Tensor>(0, CPU));
145  } else {
146  return DispatchHelper<Types>::call(this, Input(0));
147  }
148  }
149 
150  private:
151  template <typename T>
152  bool DoRunWithType() {
153  // A simple strategy to copy tensor if needed, and have the tensor pointer
154  // pointing to the right instantiation. Note that tensor_copy_if_needed
155  // will handle memory deallocation itself so no smart pointer is needed.
156  const TensorCPU* tensor;
157  Tensor tensor_copy_if_needed(CPU);
158  if (this->InputIsTensorType(0, CPU)) {
159  tensor = &this->template Input<Tensor>(0, CPU);
160  } else {
161  // sync copy
162  tensor_copy_if_needed.CopyFrom(Input(0));
163  tensor = &tensor_copy_if_needed;
164  }
165  tensor_printer_.Print<T>(*tensor);
166  return true;
167  }
168 
169  private:
170  TensorPrinter tensor_printer_;
171  int every_n_;
172  int occurrences_mod_n_{0};
173 };
174 
189 template <class Context>
190 class AliasOp final : public Operator<Context> {
191  public:
192  USE_OPERATOR_CONTEXT_FUNCTIONS;
193  USE_SIMPLE_CTOR_DTOR(AliasOp);
194 
195  bool RunOnDevice() override {
196  auto& input = Input(0);
197  CAFFE_ENFORCE_GE(input.numel(), 0, "Tensor is not initialized");
198  OutputTensorAlias(0, input);
199  return true;
200  }
201 };
202 
210 template <class Context>
211 class EnsureDenseOp final : public Operator<Context> {
212  public:
213  USE_OPERATOR_CONTEXT_FUNCTIONS;
214  USE_SIMPLE_CTOR_DTOR(EnsureDenseOp)
215 
216  bool RunOnDevice() override {
217  const auto& input = Input(0);
218  auto* output = Output(0);
219  CAFFE_ENFORCE_GT(input.dim(), 0, "Input has to be at least a vector.");
220  // it is allowed to have the output inplace overwrite the input but also
221  // allow the output to be copied from the input
222  if (&input != output) {
223  output->ResizeLike(input);
224  output->CopyFrom(input, true /*async*/);
225  }
226  return true;
227  }
228 };
229 
230 template <class Context>
231 class FlattenToVecOp : public Operator<Context> {
232  public:
233  USE_OPERATOR_CONTEXT_FUNCTIONS;
234  USE_SIMPLE_CTOR_DTOR(FlattenToVecOp);
235 
236  bool RunOnDevice() override {
237  auto& input = Input(0);
238  auto* output = Output(0);
239  CAFFE_ENFORCE_GE(
240  input.dim(), 1, "The rank of the tensor must be >= 1.");
241  output->Resize(input.numel());
242 
243  context_.CopyItemsSameDevice(
244  input.dtype(),
245  input.numel(),
246  input.raw_data(),
247  output->raw_mutable_data(input.dtype()));
248  return true;
249  }
250 };
251 
252 // Output gets the data of input(0), but reshapes it like input(1).
253 template <class Context>
254 class ResizeLikeOp : public Operator<Context> {
255  public:
256  USE_OPERATOR_CONTEXT_FUNCTIONS;
257  USE_SIMPLE_CTOR_DTOR(ResizeLikeOp);
258 
259  bool RunOnDevice() override {
260  auto& input0 = Input(0);
261  auto& input1 = Input(1);
262  auto* output = Output(0);
263  CAFFE_ENFORCE_EQ(input0.numel(), input1.numel());
264  output->ResizeLike(Input(1));
265  context_.CopyItemsSameDevice(
266  input0.dtype(),
267  input0.numel(),
268  input0.raw_data(),
269  output->raw_mutable_data(input0.dtype()));
270  return true;
271  }
272 };
273 
274 template <class Context>
275 class SumOp : public Operator<Context> {
276  public:
277  USE_OPERATOR_CONTEXT_FUNCTIONS;
278  USE_SIMPLE_CTOR_DTOR(SumOp);
279 
280  template <typename T, typename M>
281  bool DoRunWithType() {
282  auto& input0 = Input(0);
283 
284  if (InputSize() == 1) {
285  // TODO: better TensorOptions argument passing(e.g. default argument)
286  OutputTensorCopyFrom(
287  0,
288  // I'll change the order of argument in another diff, so that we don't
289  // need to write this
290  at::dtype(input0.dtype()),
291  input0,
292  true /*async*/);
293  return true;
294  }
295  auto* output = Output(0, input0.sizes(), at::dtype<T>());
296  T* output_data = output->template mutable_data<T>();
297  // Dimension checking
298  for (int i = 1; i < InputSize(); ++i) {
299  if (output->sizes() != Input(i).sizes()) {
300  CAFFE_THROW(
301  "Check failed: output->sizes() == Input(i).sizes().",
302  "Description: Input #",
303  i,
304  ", input dimension:",
305  Input(i).sizes(),
306  " should match output dimension: ",
307  output->sizes());
308  }
309  }
310 
311  // Add the first two - works if in-place or not.
312  math::Add(
313  output->numel(),
314  input0.template data<T>(),
315  Input(1).template data<T>(),
316  output_data,
317  &context_);
318  // Add remaining.
319  for (int i = 2; i < InputSize(); ++i) {
320  math::Add(
321  output->numel(),
322  output_data,
323  Input(i).template data<T>(),
324  output_data,
325  &context_);
326  }
327  return true;
328  }
329 
330  bool RunOnDevice() override {
331  if (Input(0).template IsType<float>()) {
332  return DoRunWithType<float, float>();
333  } else if (Input(0).template IsType<int>()) {
334  return DoRunWithType<int, int>();
335  } else {
336  CAFFE_THROW(
337  "Sum operator only supports 32-bit float and ints, but",
338  " input was of type ",
339  Input(0).dtype().name());
340  }
341  }
342 };
343 
344 inline OpSchema::Cost CostInferenceForSum(
345  const OperatorDef& def,
346  const std::vector<TensorShape>& in) {
347  struct OpSchema::Cost cost = PointwiseCostInference<1>(def, in);
348  cost.flops *= (in.size() - 1);
349  cost.params_bytes = 0;
350  return cost;
351 }
352 
353 // WeightedSumOp computes the weighted sum of several tensors. The input should
354 // be in the form X_0, weight_0, X_1, weight_1, ... where X_i all have the same
355 // shape, and weight_i are size 1 tensors that specifies the weight of each
356 // vector. Note that if one wants to do in-place computation, it could only be
357 // done with X_0 also as the output, but not other X_i.
358 template <class Context>
359 class WeightedSumOp : public Operator<Context> {
360  public:
361  USE_OPERATOR_CONTEXT_FUNCTIONS;
362  USE_SIMPLE_CTOR_DTOR(WeightedSumOp);
363 
364  bool RunOnDevice() override;
365 
366  template <typename T>
367  bool DoRunWithType() {
368  const int input_size = this->InputSize();
369  CAFFE_ENFORCE_EQ(input_size % 2, 0);
370  const auto& X0 = Input(0);
371  const auto& weight0 = Input(1);
372  CAFFE_ENFORCE_GT(X0.numel(), 0);
373  CAFFE_ENFORCE_EQ(weight0.numel(), 1);
374  const int size = X0.numel();
375  // Note: removed Aliasing check, since Output already has
376  // caching capability
377  auto* Y = Output(0, X0.sizes(), at::dtype<T>());
378  T* Y_data = Y->template mutable_data<T>();
379  if (input_size == 2) {
380  math::Scale<float, T>(
381  size,
382  weight0.template data<float>(),
383  X0.template data<T>(),
384  Y_data,
385  &context_);
386  return true;
387  }
388  const auto& X1 = Input(2);
389  CAFFE_ENFORCE(
390  !IsInputOutputAlias(2, 0),
391  "Input #2 is the same as output. If you want to do in-place updates, "
392  "put the output as input #0.");
393  const auto& weight1 = Input(3);
394  CAFFE_ENFORCE_EQ(X1.numel(), size);
395  CAFFE_ENFORCE_EQ(weight1.numel(), 1);
396  if (!IsInputOutputAlias(0, 0)) {
397  context_.template CopySameDevice<T>(size, X0.template data<T>(), Y_data);
398  }
399  math::Axpby<float, T, Context>(
400  size,
401  weight1.template data<float>(),
402  X1.template data<T>(),
403  weight0.template data<float>(),
404  Y_data,
405  &context_);
406  for (int i = 4; i < input_size; i += 2) {
407  const auto& Xi = Input(i);
408  // Do a check: if the input is the same as output, we have a problem -
409  // in-place update should always only happen with the zeroth input.
410  const std::string err_msg = "Input #" + to_string(i) +
411  " is the same as output. If you want to do in-place updates, "
412  "put the output as input #0.";
413  CAFFE_ENFORCE(!IsInputOutputAlias(i, 0), err_msg);
414  const auto& weighti = Input(i + 1);
415  CAFFE_ENFORCE_EQ(Xi.numel(), size);
416  CAFFE_ENFORCE_EQ(weighti.numel(), 1);
417  math::Axpy<T, Context>(
418  size,
419  weighti.template data<float>(),
420  Xi.template data<T>(),
421  Y_data,
422  &context_);
423  }
424  return true;
425  }
426 };
427 
428 template <class Context>
429 class WeightedSumGradientOp : public Operator<Context> {
430  public:
431  USE_OPERATOR_CONTEXT_FUNCTIONS;
432 
433  template <class... Args>
434  explicit WeightedSumGradientOp(Args&&... args)
435  : Operator<Context>(std::forward<Args>(args)...),
436  grad_on_w_(this->template GetSingleArgument<bool>("grad_on_w", false)) {
437  }
438 
439  template <typename DstType>
440  bool DoRunWithType() {
441  CAFFE_ENFORCE_EQ(InputSize() % 2, 1);
442  auto output_size = grad_on_w_ ? InputSize() - 1 : InputSize() / 2;
443  CAFFE_ENFORCE_EQ(OutputSize(), output_size);
444 
445  auto& dY = Input(0);
446  const auto* dY_data = dY.template data<DstType>();
447  int size = dY.numel();
448 
449  // The input size should be the input size of the forward op plus 1
450  for (int i = 0; i < InputSize() / 2; i++) {
451  auto& cur_w = Input(2 * i + 2);
452  CAFFE_ENFORCE_EQ(cur_w.numel(), 1);
453 
454  auto* cur_dX = Output(i, dY.sizes(), at::dtype<DstType>());
455 
456  math::Scale<float, DstType, Context>(
457  size,
458  cur_w.template data<float>(),
459  dY_data,
460  cur_dX->template mutable_data<DstType>(),
461  &context_);
462 
463  if (grad_on_w_) {
464  auto& cur_X = Input(2 * i + 1);
465  CAFFE_ENFORCE_EQ(cur_X.numel(), size);
466  auto* cur_dw = Output(i + output_size / 2);
467  cur_dw->Resize(1);
468  math::Dot<DstType, Context>(
469  size,
470  dY_data,
471  cur_X.template data<DstType>(),
472  cur_dw->template mutable_data<float>(),
473  &context_);
474  }
475  }
476 
477  return true;
478  }
479 
480  bool RunOnDevice() override;
481 
482  private:
483  bool grad_on_w_;
484 };
485 
524 template <typename T, class Context>
525 class ScatterWeightedSumOp : public Operator<Context> {
526  public:
527  USE_OPERATOR_CONTEXT_FUNCTIONS;
528  USE_SIMPLE_CTOR_DTOR(ScatterWeightedSumOp);
529  USE_DISPATCH_HELPER;
530 
531  bool RunOnDevice() override {
532  return DispatchHelper<TensorTypes<int32_t, int64_t>>::call(this, Input(2));
533  }
534 
535  private:
536  template <typename Index>
537  bool DoRunWithType() {
538  int64_t block_size = Input(0).size_from_dim(1);
539  return DispatchHelper<FixedValues<1>, Index>::call(this, block_size);
540  }
541 
542  template <typename Index, int FixedSize>
543  bool DoRunWithValue() {
544  CAFFE_ENFORCE_EQ(InputSize() % 2, 1);
545  auto& X0 = Input(0);
546  auto& weight0 = Input(1);
547  auto& indices = Input(2);
548  auto* output = Output(0);
549  CAFFE_ENFORCE_EQ(&X0, output, "In place operation is required");
550 
551  CAFFE_ENFORCE_GT(X0.numel(), 0);
552  CAFFE_ENFORCE_GT(X0.dim(), 0, "X0 has to be at least the vector");
553  CAFFE_ENFORCE_EQ(weight0.numel(), 1);
554  int64_t M = X0.numel();
555  int64_t N = X0.size(0);
556  int64_t K = indices.numel();
557  int64_t block_size = M / N;
558  T* data = output->template mutable_data<T>();
559  const Index* idxs = indices.template data<Index>();
560  T w0 = *weight0.template data<T>();
561  // It's most likely a constant so exact comparison is fine
562  if (w0 != 1.0) {
563  for (int i = 0; i < K; ++i) {
564  Index idx = idxs[i];
565  CAFFE_ENFORCE(
566  0 <= idx && idx < N,
567  "Index out of bounds: ",
568  idx,
569  ", range 0 to ",
570  N);
571  math::ScaleFixedSize<T, Context, FixedSize>(
572  block_size,
573  w0,
574  data + block_size * idx,
575  data + block_size * idx,
576  &context_);
577  }
578  }
579  for (int inp = 3; inp < InputSize(); inp += 2) {
580  auto& X = Input(inp);
581  auto& weight = Input(inp + 1);
582  CAFFE_ENFORCE_EQ(X.numel(), block_size * K);
583  CAFFE_ENFORCE_EQ(weight.numel(), 1);
584  const T* x_data = X.template data<T>();
585  T w = *weight.template data<T>();
586  for (int i = 0; i < K; ++i) {
587  Index idx = idxs[i];
588  // double-checking the indices, but it's fine as it's DCHECK only
589  DCHECK(0 <= idx && idx < N)
590  << "Index out of bounds: " << idx << ", range 0 to " << N;
591  math::AxpyFixedSize<T, Context, FixedSize>(
592  block_size,
593  w,
594  x_data + block_size * i,
595  data + block_size * idx,
596  &context_);
597  }
598  }
599  return true;
600  }
601  Tensor x_data_host_;
602  Tensor weights_host_;
603  Tensor x_data_device_;
604  Tensor weights_device_;
605 };
606 
630 template <class Context>
631 class ScatterAssignOp : public Operator<Context> {
632  public:
633  USE_OPERATOR_CONTEXT_FUNCTIONS;
634  virtual ~ScatterAssignOp() {}
635 
636  template <class... Args>
637  explicit ScatterAssignOp(Args&&... args)
638  : Operator<Context>(std::forward<Args>(args)...),
639  runners_({{{TensorProto_DataType_INT32, TensorProto_DataType_FLOAT},
640  &ScatterAssignOp::DoRun<int32_t, float>},
641  {{TensorProto_DataType_INT32, TensorProto_DataType_FLOAT16},
642  &ScatterAssignOp::DoRun<int32_t, at::Half>},
643  {{TensorProto_DataType_INT32, TensorProto_DataType_UINT8},
644  &ScatterAssignOp::DoRun<int32_t, uint8_t>},
645  {{TensorProto_DataType_INT32, TensorProto_DataType_INT32},
646  &ScatterAssignOp::DoRun<int32_t, int32_t>},
647  {{TensorProto_DataType_INT32, TensorProto_DataType_INT64},
648  &ScatterAssignOp::DoRun<int32_t, int64_t>},
649  {{TensorProto_DataType_INT64, TensorProto_DataType_FLOAT},
650  &ScatterAssignOp::DoRun<int64_t, float>},
651  {{TensorProto_DataType_INT64, TensorProto_DataType_FLOAT16},
652  &ScatterAssignOp::DoRun<int64_t, at::Half>},
653  {{TensorProto_DataType_INT64, TensorProto_DataType_UINT8},
654  &ScatterAssignOp::DoRun<int64_t, uint8_t>},
655  {{TensorProto_DataType_INT64, TensorProto_DataType_INT32},
656  &ScatterAssignOp::DoRun<int64_t, int32_t>},
657  {{TensorProto_DataType_INT64, TensorProto_DataType_INT64},
658  &ScatterAssignOp::DoRun<int64_t, int64_t>}}) {}
659 
660  bool RunOnDevice() override {
661  const auto& data = Input(DATA);
662  const auto& slices = Input(SLICES);
663  auto& indices = Input(INDICES);
664 
665  const auto dataType = TypeMetaToDataType(data.dtype());
666  const auto slicesType = TypeMetaToDataType(slices.dtype());
667  const auto indicesType = TypeMetaToDataType(indices.dtype());
668  auto* output = Output(0);
669 
670  auto runner = GetRunner(dataType, slicesType, indicesType);
671  (this->*runner)();
672  return true;
673  }
674 
675  private:
676  typedef void (ScatterAssignOp::*RunnerType)();
677  typedef std::
678  map<std::pair<TensorProto_DataType, TensorProto_DataType>, RunnerType>
679  RunnerMap;
680 
681  RunnerMap runners_;
682 
683  RunnerType GetRunner(
684  const TensorProto_DataType dataType,
685  const TensorProto_DataType slicesType,
686  const TensorProto_DataType indicesType) {
687  CAFFE_ENFORCE_EQ(dataType, slicesType, "Data and slice types must match");
688  auto it = runners_.find({indicesType, dataType});
689  CAFFE_ENFORCE(
690  it != runners_.end(),
691  "Could not find the runner corresponding to indicesType, dataType = ",
692  indicesType,
693  " ",
694  dataType);
695  return it->second;
696  }
697 
698  template <typename Index, typename T>
699  void DoRun() {
700  auto& input = Input(DATA);
701  auto& indices = Input(INDICES);
702  auto& slices = Input(SLICES);
703  auto* output = Output(0);
704  CAFFE_ENFORCE_EQ(&input, output, "In place operation is required");
705 
706  CAFFE_ENFORCE_GT(input.dim(), 0, "X0 has to be at least the vector");
707  int64_t M = input.numel();
708  int64_t N = input.size(0);
709  int64_t K = indices.numel();
710  int64_t block_size = M / N;
711  CAFFE_ENFORCE_EQ(slices.numel(), block_size * K);
712  // TODO(dzhulgakov): it can be made to work with arbitrary data type by
713  // using raw_mutable_data
714  T* data = output->template mutable_data<T>();
715  const Index* idxs = indices.template data<Index>();
716  const T* slicesData = slices.template data<T>();
717  DoScatterAssign(data, idxs, slicesData, N, K, block_size);
718  }
719 
720  template <typename Index, typename T>
721  void DoScatterAssign(
722  T* data,
723  const Index* idxs,
724  const T* slicesData,
725  int64_t N,
726  int64_t K,
727  int64_t block_size) {
728  for (int i = 0; i < K; ++i) {
729  Index idx = idxs[i];
730  // double-checking the indices, but it's fine as it's DCHECK only
731  DCHECK(0 <= idx && idx < N)
732  << "Index out of bounds: " << idx << ", range 0 to " << N;
733  context_.template CopySameDevice<T>(
734  block_size, slicesData + block_size * i, data + block_size * idx);
735  }
736  }
737 
738  INPUT_TAGS(DATA, INDICES, SLICES);
739 };
740 
741 template <class Context>
742 class LengthsToSegmentIdsOp : public Operator<Context> {
743  public:
744  USE_OPERATOR_CONTEXT_FUNCTIONS;
745  USE_SIMPLE_CTOR_DTOR(LengthsToSegmentIdsOp);
746 
747  bool RunOnDevice() override {
748  auto& input = Input(0);
749  auto* output = Output(0);
750  auto* input_data = input.template data<int32_t>();
751 
752  CAFFE_ENFORCE(input.sizes().size() == 1, "Input must be a vector.");
753  auto total_length =
754  std::accumulate(input_data, input_data + input.numel(), 0);
755 
756  output->Resize(total_length);
757  auto* output_data = output->template mutable_data<int32_t>();
758 
759  for (int i = 0; i < input.numel(); ++i) {
760  auto len = input_data[i];
761  std::fill(output_data, output_data + len, i);
762  output_data += len;
763  }
764  return true;
765  }
766 };
767 
768 template <class Context>
769 class LengthsToRangesOp : public Operator<Context> {
770  public:
771  USE_OPERATOR_CONTEXT_FUNCTIONS;
772  USE_SIMPLE_CTOR_DTOR(LengthsToRangesOp);
773 
774  bool RunOnDevice() override {
775  auto& input = Input(0);
776  auto* output = Output(0);
777  auto* input_data = input.template data<int32_t>();
778 
779  CAFFE_ENFORCE(input.sizes().size() == 1, "Input must be a vector.");
780  auto size = input.numel();
781 
782  output->Resize(size, 2);
783  auto* output_data = output->template mutable_data<int32_t>();
784 
785  int32_t offset = 0;
786  for (int i = 0; i < size; ++i) {
787  auto len = input_data[i];
788  output_data[i * 2] = offset;
789  output_data[i * 2 + 1] = len;
790  offset += len;
791  }
792  return true;
793  }
794 };
795 
796 template <class Context>
797 class SegmentIdsToLengthsOp : public Operator<Context> {
798  public:
799  USE_OPERATOR_CONTEXT_FUNCTIONS;
800  USE_SIMPLE_CTOR_DTOR(SegmentIdsToLengthsOp);
801 
802  bool RunOnDevice() override {
803  return DispatchHelper<TensorTypes<int32_t, int64_t>>::call(this, Input(0));
804  }
805 
806  template <typename Index>
807  bool DoRunWithType() {
808  auto& input = Input(0);
809  if (input.dim() == 2) {
810  CAFFE_ENFORCE(
811  input.dim32(0) == 1 || input.dim32(1) == 1,
812  "Input must be a vector.");
813  } else {
814  CAFFE_ENFORCE_EQ(input.dim(), 1, "Input must be a vector.");
815  }
816  auto* input_data = input.template data<Index>();
817  auto input_size = input.numel();
818  auto* output = Output(0);
819  // segment id starts from 0
820  auto num_segments = input_size ? input_data[input_size - 1] + 1 : 0;
821  if (InputSize() > 1) {
822  CAFFE_ENFORCE_GE(Input(1).dim(), 1);
823  CAFFE_ENFORCE_LE(
824  num_segments,
825  Input(1).size(0),
826  "The number of segments inferred should *NOT* be larger "
827  "than the size of Input(1)'s first dimension");
828  num_segments = Input(1).size(0);
829  }
830  CAFFE_ENFORCE(0 <= num_segments, "Indices must be in 0..K-1 range");
831  output->Resize(num_segments);
832  auto* output_data = output->template mutable_data<int32_t>();
833  if (num_segments == 0) {
834  return true;
835  }
836  std::fill(output_data, output_data + num_segments, 0);
837  Index prev = 0; // Assume that segment_id >= 0.
838  for (int64_t i = 0; i < input_size; i++) {
839  CAFFE_ENFORCE(
840  prev <= input_data[i],
841  "Segment ids must be sorted: ",
842  prev,
843  " vs ",
844  input_data[i]);
845  prev = input_data[i];
846  output_data[input_data[i]] += 1;
847  }
848 
849  return true;
850  }
851 };
852 
853 template <class Context>
854 class SegmentIdsToRangesOp : public Operator<Context> {
855  public:
856  USE_OPERATOR_CONTEXT_FUNCTIONS;
857  USE_SIMPLE_CTOR_DTOR(SegmentIdsToRangesOp);
858 
859  bool RunOnDevice() override {
860  return DispatchHelper<TensorTypes<int32_t, int64_t>>::call(this, Input(0));
861  }
862 
863  template <typename Index>
864  bool DoRunWithType() {
865  auto& input = Input(0);
866  CAFFE_ENFORCE(input.sizes().size() == 1, "Input must be a vector.");
867  auto* input_data = input.template data<Index>();
868  auto input_size = input.numel();
869  auto* output = Output(0);
870  // segment id starts from 0
871  auto num_segments = input_size ? input_data[input_size - 1] + 1 : 0;
872  if (InputSize() > 1) {
873  CAFFE_ENFORCE_GE(Input(1).dim(), 1);
874  CAFFE_ENFORCE_LE(
875  num_segments,
876  Input(1).size(0),
877  "The number of segments inferred should *NOT* be larger "
878  "than the size of Input(1)'s first dimension");
879  num_segments = Input(1).size(0);
880  }
881  CAFFE_ENFORCE(0 <= num_segments, "Indices must be in 0..K-1 range");
882  output->Resize(num_segments, 2);
883  auto* output_data = output->template mutable_data<int32_t>();
884  if (num_segments == 0) {
885  return true;
886  }
887  std::fill(output_data, output_data + num_segments * 2, 0);
888  Index prev = input_data[0];
889  for (int64_t i = 0; i < input_size; i++) {
890  CAFFE_ENFORCE(
891  prev <= input_data[i],
892  "Segment ids must be sorted: ",
893  prev,
894  " vs ",
895  input_data[i]);
896  while (prev != input_data[i]) {
897  ++prev;
898  output_data[prev * 2] = i;
899  }
900  output_data[input_data[i] * 2 + 1] += 1;
901  }
902 
903  return true;
904  }
905 };
906 
907 template <class Context>
908 class LengthsToWeightsOp : public Operator<Context> {
909  public:
910  USE_OPERATOR_CONTEXT_FUNCTIONS;
911  template <class... Args>
912  explicit LengthsToWeightsOp(Args&&... args)
913  : Operator<Context>(std::forward<Args>(args)...),
914  power_(this->template GetSingleArgument<float>("power", 0.5)) {}
915 
916  bool RunOnDevice() override {
917  return DispatchHelper<TensorTypes<int32_t, int64_t>>::call(this, Input(0));
918  }
919 
920  template <typename Index>
921  bool DoRunWithType() {
922  auto& input = Input(0);
923  CAFFE_ENFORCE(input.sizes().size() == 1, "Input must be a vector.");
924  auto* input_data = input.template data<Index>();
925  auto input_size = input.numel();
926  auto* output = Output(0);
927 
928  int64_t output_size = 0;
929  for (auto i = 0; i < input_size; i++) {
930  CAFFE_ENFORCE_GE(input_data[i], 0, "unexpected negative length value");
931  output_size += input_data[i];
932  }
933 
934  std::function<float(const int64_t& length, const float& power)> getWeight;
935  if (power_ == 0.5) {
936  getWeight = [](const int64_t& length, const float& /*power*/) {
937  return 1.0 / std::sqrt(length);
938  };
939  } else if (power_ == 1) {
940  getWeight = [](const int64_t& length, const float& /*power*/) {
941  return 1.0 / length;
942  };
943  } else {
944  getWeight = [](const int64_t& length, const float& power) {
945  return 1.0 / std::pow(length, power);
946  };
947  }
948 
949  output->Resize(output_size);
950  auto* output_data = output->template mutable_data<float>();
951  int64_t cnt = 0;
952  for (auto i = 0; i < input_size; i++) {
953  auto len = input_data[i];
954  if (len == 0) {
955  continue;
956  }
957  CAFFE_ENFORCE_LE(cnt + len, output_size, "unexpected lengths value");
958 
959  float weight_value = getWeight(len, power_);
960  std::fill(output_data + cnt, output_data + cnt + len, weight_value);
961  cnt += len;
962  }
963 
964  return true;
965  }
966 
967  private:
968  float power_;
969 };
970 
971 template <class Context>
972 class HasElementsOp : public Operator<Context> {
973  public:
974  USE_OPERATOR_CONTEXT_FUNCTIONS;
975  USE_SIMPLE_CTOR_DTOR(HasElementsOp);
976 
977  bool RunOnDevice() override {
978  auto& input = Input(0);
979  auto* output = Output(0);
980  output->Resize(std::vector<int64_t>{});
981  *output->template mutable_data<bool>() = input.numel() > 0;
982  return true;
983  }
984 };
985 
986 // Return the size of a tensor
987 template <class Context>
988 class SizeOp : public Operator<Context> {
989  public:
990  USE_OPERATOR_CONTEXT_FUNCTIONS;
991  USE_SIMPLE_CTOR_DTOR(SizeOp);
992 
993  bool RunOnDevice() override {
994  auto& input = Input(0);
995 
996  auto* output = Output(0, vector<int64_t>(), at::dtype<int64_t>());
997  auto* output_data = output->template mutable_data<int64_t>();
998 
999  auto size = input.numel();
1000  math::Set<int64_t, Context>(
1001  1, static_cast<int64_t>(size), output_data, &context_);
1002 
1003  return true;
1004  }
1005 };
1006 
1007 // returns a shape to be passed to Reshape
1008 template <class Context>
1009 class LengthsToShapeOp : public Operator<Context> {
1010  public:
1011  USE_OPERATOR_CONTEXT_FUNCTIONS;
1012  USE_SIMPLE_CTOR_DTOR(LengthsToShapeOp);
1013 
1014  bool RunOnDevice() override {
1015  auto& input = Input(0);
1016 
1017  CAFFE_ENFORCE(input.sizes().size() == 1, "Input must be a vector.");
1018  auto* output = Output(0);
1019  auto* input_data = input.template data<int32_t>();
1020 
1021  auto size = input.numel();
1022  auto first = input_data[0];
1023 
1024  for (int i = 1; i < size; i++) {
1025  CAFFE_ENFORCE(
1026  input_data[i] == first, "All elements of input must be same ");
1027  }
1028 
1029  output->Resize(2);
1030  auto* output_data = output->template mutable_data<int32_t>();
1031  output_data[0] = size;
1032  output_data[1] = first;
1033 
1034  return true;
1035  }
1036 };
1037 
1038 template <class Context>
1039 class GatherRangesOp : public Operator<Context> {
1040  public:
1041  USE_OPERATOR_CONTEXT_FUNCTIONS;
1042  USE_SIMPLE_CTOR_DTOR(GatherRangesOp);
1043 
1044  bool RunOnDevice() override {
1046  this, this->template Input<Tensor>(RANGES, CPU));
1047  }
1048 
1049  template <typename Index>
1050  bool DoRunWithType() {
1051  auto& data = Input(DATA);
1052  auto& ranges = Input(RANGES);
1053  auto* outputData = Output(0);
1054  auto* outputLengths = Output(1);
1055 
1056  auto batchSize = ranges.size(0);
1057  CAFFE_ENFORCE(data.dim() == 1, "Data has to be 1-D");
1058  CAFFE_ENFORCE(ranges.dim() == 3, "Ranges must be 3-D");
1059  CAFFE_ENFORCE(ranges.size(1) > 0, "There has to be at least one range");
1060  CAFFE_ENFORCE_EQ(
1061  ranges.size(2), 2, "Ranges last dimention should be of size 2");
1062 
1063  auto* rawData = static_cast<const char*>(data.raw_data());
1064  auto* rangesData = ranges.template data<Index>();
1065 
1066  outputLengths->Resize(batchSize);
1067  auto* outputLengthsPtr = outputLengths->template mutable_data<int32_t>();
1068  size_t start = 0;
1069  size_t blockSize = ranges.size_from_dim(1);
1070  for (size_t i = 0; i < batchSize; ++i) {
1071  auto end = start + blockSize;
1072  outputLengthsPtr[i] = accumulate(rangesData, start, end);
1073  start = end;
1074  }
1075 
1076  size_t outputSize = accumulate(rangesData, 0, ranges.numel());
1077  outputData->Resize(outputSize);
1078 
1079  auto outputRawData =
1080  static_cast<char*>(outputData->raw_mutable_data(data.dtype()));
1081  VLOG(1) << "Copying data";
1082  size_t outputOffsetBytes = 0;
1083  auto itemsize = data.dtype().itemsize();
1084  for (int i = 0; i < ranges.numel(); i += 2) {
1085  auto rangeStart = rangesData[i];
1086  auto rangeLength = rangesData[i + 1];
1087  if (!rangeLength) {
1088  continue;
1089  }
1090  auto rangeSizeBytes = rangeLength * itemsize;
1091  CAFFE_ENFORCE(outputOffsetBytes < outputSize * itemsize);
1092  CAFFE_ENFORCE(rangeStart + rangeLength <= data.numel());
1093  context_.CopyItemsSameDevice(
1094  data.dtype(),
1095  rangeLength,
1096  rawData + rangeStart * itemsize,
1097  outputRawData + outputOffsetBytes);
1098  outputOffsetBytes += rangeSizeBytes;
1099  }
1100  CAFFE_ENFORCE(outputOffsetBytes == outputSize * itemsize);
1101  return true;
1102  }
1103 
1104  INPUT_TAGS(DATA, RANGES, LENGTHS);
1105 
1106  private:
1107  template <typename Index>
1108  size_t accumulate(Index* ranges, size_t start, size_t end) {
1109  size_t result = 0;
1110  for (size_t i = start + 1; i < end; i += 2) {
1111  result += ranges[i];
1112  }
1113  return result;
1114  }
1115 };
1116 
1117 template <class Context>
1118 class LengthsGatherOp : public Operator<Context> {
1119  public:
1120  USE_OPERATOR_CONTEXT_FUNCTIONS;
1121  USE_SIMPLE_CTOR_DTOR(LengthsGatherOp);
1122 
1123  bool RunOnDevice() override {
1125  this, this->template Input<Tensor>(INDICES, CPU));
1126  }
1127 
1128  template <typename Index>
1129  bool DoRunWithType() {
1130  auto& items = Input(ITEMS);
1131  auto& lengths = Input(LENGTHS);
1132  auto& indices = Input(INDICES);
1133  auto* output = Output(0);
1134 
1135  CAFFE_ENFORCE_GE(items.dim(), 1, "ITEMS should be at least 1-D");
1136  CAFFE_ENFORCE_EQ(lengths.dim(), 1, "LENGTHS should be 1-D");
1137  CAFFE_ENFORCE_EQ(indices.dim(), 1, "INDICES should be 1-D");
1138 
1139  const auto* lengths_data = lengths.template data<int32_t>();
1140  const auto* indices_data = indices.template data<Index>();
1141 
1142  int64_t total_length = 0;
1143  for (size_t i = 0; i < indices.numel(); ++i) {
1144  auto idx = indices_data[i];
1145  CAFFE_ENFORCE_LT(idx, lengths.numel());
1146  total_length += lengths_data[idx];
1147  }
1148  auto shape = items.sizes().vec();
1149  shape[0] = total_length;
1150  output->Resize(shape);
1151 
1152  offsets_.clear();
1153  int64_t running_offset = 0;
1154  offsets_.reserve(lengths.numel());
1155  for (size_t i = 0; i < lengths.numel(); ++i) {
1156  offsets_.push_back(running_offset);
1157  running_offset += lengths_data[i];
1158  }
1159  CAFFE_ENFORCE_EQ(
1160  items.size(0),
1161  running_offset,
1162  "LENGTHS must match the first dimension of ITEMS");
1163 
1164  auto src_base = static_cast<const char*>(items.raw_data());
1165  auto block_size = items.size_from_dim(1);
1166  auto block_bytesize = block_size * items.itemsize();
1167  auto out = static_cast<char*>(output->raw_mutable_data(items.dtype()));
1168 
1169  for (size_t i = 0; i < indices.numel(); ++i) {
1170  auto idx = indices_data[i];
1171  auto length = lengths_data[idx];
1172  context_.CopyItemsSameDevice(
1173  items.dtype(),
1174  length * block_size,
1175  src_base + offsets_[idx] * block_bytesize,
1176  out);
1177  out += length * block_bytesize;
1178  }
1179  return true;
1180  }
1181 
1182  std::vector<int64_t> offsets_;
1183 
1184  INPUT_TAGS(ITEMS, LENGTHS, INDICES);
1185 };
1186 
1187 template <typename T, class Context>
1188 class AccumulateHistogramOp : public Operator<Context> {
1189  public:
1190  template <class... Args>
1191  explicit AccumulateHistogramOp(Args&&... args)
1192  : Operator<Context>(std::forward<Args>(args)...),
1193  lower_bound_(
1194  this->template GetSingleArgument<float>("lower_bound", 0.0)),
1195  upper_bound_(
1196  this->template GetSingleArgument<float>("upper_bound", 1.0)),
1197  num_buckets_(this->template GetSingleArgument<int>("num_buckets", 1)) {
1198  CAFFE_ENFORCE_GT(num_buckets_, 0);
1199  // 2 more for histograms < lower_bound, >= upper_bound respectively
1200  num_output_buckets_ = num_buckets_ + 2;
1201  accumulate_hist_ = std::vector<int64_t>(num_output_buckets_, 0);
1202  }
1203 
1204  USE_OPERATOR_CONTEXT_FUNCTIONS;
1205 
1206  bool RunOnDevice() override {
1207  auto& X = Input(X_IN);
1208  auto* X_data = X.template data<T>();
1209  int N = X.numel();
1210  auto* cur_hist = Output(CUR_HIST);
1211  auto* acc_hist = Output(ACC_HIST);
1212  cur_hist->Resize(num_output_buckets_);
1213  acc_hist->Resize(num_output_buckets_);
1214  auto* cur_hist_data = cur_hist->template mutable_data<int64_t>();
1215  auto* acc_hist_data = acc_hist->template mutable_data<int64_t>();
1216  auto segment = (upper_bound_ - lower_bound_) / num_buckets_;
1217  math::Set<int64_t, Context>(
1218  num_output_buckets_, 0, cur_hist_data, &context_);
1219 
1220  for (int i = 0; i < N; i++) {
1221  int bucket_index = -1;
1222  if (X_data[i] < lower_bound_) {
1223  bucket_index = 0;
1224  } else if (X_data[i] >= upper_bound_) {
1225  bucket_index = num_buckets_ + 1;
1226  } else {
1227  bucket_index = (int)((X_data[i] - lower_bound_) / segment) + 1;
1228  }
1229  cur_hist_data[bucket_index] += 1;
1230  accumulate_hist_[bucket_index] += 1;
1231  }
1232 
1233  for (int i = 0; i < num_output_buckets_; i++) {
1234  acc_hist_data[i] = accumulate_hist_[i];
1235  }
1236 
1237  return true;
1238  }
1239 
1240  private:
1241  float lower_bound_;
1242  float upper_bound_;
1243  int num_buckets_;
1244  int num_output_buckets_;
1245  std::vector<int64_t> accumulate_hist_;
1246 
1247  INPUT_TAGS(X_IN);
1248  OUTPUT_TAGS(CUR_HIST, ACC_HIST);
1249 };
1250 
1251 template <class Context>
1252 class RangeOp : public Operator<Context> {
1253  public:
1254  USE_OPERATOR_CONTEXT_FUNCTIONS;
1255  USE_SIMPLE_CTOR_DTOR(RangeOp)
1256 
1257  bool RunOnDevice() override {
1259  this, Input(0));
1260  }
1261 
1262  template <typename T>
1263  T readScalarInput(const int index) {
1264  if (std::is_same<Context, TensorCPU>::value) {
1265  return Input(index).template data<T>()[0];
1266  } else {
1267  local_.CopyFrom(Input(index));
1268  return local_.template data<T>()[0];
1269  }
1270  }
1271 
1272  template <typename T>
1273  bool DoRunWithType() {
1274  T stop = 0;
1275  T start = 0;
1276  T step = 1;
1277 
1278  for (int i = 0; i < InputSize(); ++i) {
1279  CAFFE_ENFORCE_EQ(Input(0).dim(), 0, "All inputs must be scalar.");
1280  }
1281 
1282  switch (InputSize()) {
1283  case 1:
1284  stop = readScalarInput<T>(0);
1285  break;
1286  case 2:
1287  start = readScalarInput<T>(0);
1288  stop = readScalarInput<T>(1);
1289  break;
1290  case 3:
1291  step = readScalarInput<T>(2);
1292  start = readScalarInput<T>(0);
1293  stop = readScalarInput<T>(1);
1294  break;
1295  }
1296  CAFFE_ENFORCE_NE(step, 0, "Step size cannot be 0.");
1297  int length;
1298  auto diff = stop - start;
1299  if (std::is_integral<T>::value) {
1300  // Avoid casting to and from floats in case it introduces rounding and
1301  // avoid mod because the compiler doesn't strip unused code until later.
1302  length = diff / step;
1303  if (length * step < diff) {
1304  length += 1;
1305  }
1306  } else {
1307  length = static_cast<int>(ceil(diff / step));
1308  }
1309 
1310  // Match numpy's behavior here.
1311  if (length <= 0) {
1312  Output(0, {0}, at::dtype<T>());
1313  return true;
1314  } else {
1315  auto* output = Output(0, {length}, at::dtype<T>());
1316  return DoRunOnDevice<T>(start, step, output);
1317  }
1318  }
1319 
1320  template <typename T>
1321  bool DoRunOnDevice(const T& start, const T& step, Tensor* output);
1322 
1323  private:
1324  // local CPU tensor for copying constants.
1325  Tensor local_{CPU};
1326 };
1327 
1328 class ThrowExceptionOp : public Operator<CPUContext> {
1329  public:
1330  template <class... Args>
1331  explicit ThrowExceptionOp(Args&&... args)
1332  : Operator<CPUContext>(std::forward<Args>(args)...),
1333  message_(GetSingleArgument<std::string>(
1334  "message",
1335  "Exception from ThrowExceptionOp")) {}
1336 
1337  bool RunOnDevice() override {
1338  CAFFE_THROW(message_);
1339  }
1340 
1341  private:
1342  const std::string message_;
1343 };
1344 
1345 class ThrowChildThreadExceptionOp : public Operator<CPUContext> {
1346  public:
1347  template <class... Args>
1348  explicit ThrowChildThreadExceptionOp(Args&&... args)
1349  : Operator<CPUContext>(std::forward<Args>(args)...),
1350  message_(GetSingleArgument<std::string>(
1351  "message",
1352  "Exception from ThrowChildThreadExceptionOp")) {}
1353 
1354  bool RunOnDevice() override {
1355  std::thread t([this]() { CAFFE_THROW(this->message_); });
1356 
1357  t.join();
1358  return true;
1359  }
1360 
1361  private:
1362  const std::string message_;
1363 };
1364 
1365 class LogFatalOp : public Operator<CPUContext> {
1366  public:
1367  template <class... Args>
1368  explicit LogFatalOp(Args&&... args)
1369  : Operator<CPUContext>(std::forward<Args>(args)...),
1370  message_(GetSingleArgument<std::string>(
1371  "message",
1372  "Logging from LogFatalOp")) {}
1373 
1374  bool RunOnDevice() override {
1375  LOG(FATAL) << message_;
1376  return true;
1377  }
1378 
1379  private:
1380  const std::string message_;
1381 };
1382 
1383 class FailOp : public Operator<CPUContext> {
1384  public:
1385  template <class... Args>
1386  explicit FailOp(Args&&... args)
1387  : Operator<CPUContext>(std::forward<Args>(args)...) {}
1388 
1389  bool RunOnDevice() override {
1390  return false;
1391  }
1392 };
1393 
1394 } // namespace caffe2
1395 
1396 #endif // CAFFE2_OPERATORS_UTILITY_OPS_H_
const string & RootFolder()
Return the root folder of the workspace.
Definition: workspace.h:175
Definition: any.cpp:108
Update slices of the tensor in-place by overriding.
Definition: utility_ops.h:631
Pass inputs to outputs.
Definition: utility_ops.h:211
Workspace is a class that holds all the related objects created during runtime: (1) all blobs...
Definition: workspace.h:47
const Tensor & Input(int idx, DeviceType type=Context::GetDeviceType())
Retrieve a non-owning reference to the input at position &#39;idx&#39; for this operator. ...
Definition: operator.h:702
A global dictionary that holds information about what Caffe2 modules have been loaded in the current ...
Definition: blob.h:13
Update slices of the tensor in-place with weighted sum.
Definition: utility_ops.h:525
Alias op makes the output and the input share the same underlying storage.
Definition: utility_ops.h:190