Caffe2 - C++ API
A deep learning, cross platform ML framework
utility_ops.h
1 
17 #ifndef CAFFE2_OPERATORS_UTILITY_OPS_H_
18 #define CAFFE2_OPERATORS_UTILITY_OPS_H_
19 
20 #include <math.h>
21 
22 #include "caffe2/core/common_omp.h"
23 #include "caffe2/core/context.h"
24 #include "caffe2/core/logging.h"
25 #include "caffe2/core/operator.h"
26 #include "caffe2/core/types.h"
27 #include "caffe2/utils/math.h"
28 
29 #include <map>
30 #include <utility>
31 
32 namespace caffe2 {
33 
34 template <class Context>
35 class NanCheckOp final : public Operator<Context> {
36  public:
37  USE_OPERATOR_CONTEXT_FUNCTIONS;
38  NanCheckOp(const OperatorDef& operator_def, Workspace* ws)
39  : Operator<Context>(operator_def, ws) {}
40 
41  bool RunOnDevice() override;
42 
43  private:
44  TensorPrinter tensorPrinter_;
45  Tensor<Context> scratch_;
46 };
47 
49  using GradientMakerBase::GradientMakerBase;
50  std::vector<OperatorDef> GetGradientDefs() override {
51  return {CreateOperatorDef(
52  "NanCheck",
53  "",
54  std::vector<string>{GO(0)},
55  std::vector<string>{GI(0)})};
56  }
57 };
58 
59 template <class Context>
60 class WallClockTimeOp final : public Operator<Context> {
61  public:
62  USE_OPERATOR_CONTEXT_FUNCTIONS;
63 
64  WallClockTimeOp(const OperatorDef& operator_def, Workspace* ws)
65  : Operator<Context>(operator_def, ws) {}
66 
67  bool RunOnDevice() override {
68  int64_t nanoseconds = static_cast<long int>(
69  std::chrono::duration_cast<std::chrono::nanoseconds>(
70  std::chrono::high_resolution_clock::now().time_since_epoch())
71  .count());
72 
73  TensorCPU* output = OperatorBase::Output<TensorCPU>(0);
74  output->Resize();
75  *output->template mutable_data<int64_t>() = nanoseconds;
76 
77  return true;
78  }
79 };
80 
81 const char kPrintFileExtension[] = ".log";
82 
83 template <class Context>
84 class PrintOp final : public Operator<Context> {
85  public:
86  USE_OPERATOR_CONTEXT_FUNCTIONS;
87  USE_DISPATCH_HELPER;
88  PrintOp(const OperatorDef& operator_def, Workspace* ws)
89  : Operator<Context>(operator_def, ws),
90  tensor_printer_(
91  operator_def.input(0),
92  OperatorBase::GetSingleArgument<int>("to_file", 0)
93  ? ws->RootFolder() + "/" + operator_def.input(0) +
94  kPrintFileExtension
95  : "",
96  OperatorBase::GetSingleArgument<int>("limit", 0)),
97  every_n_(OperatorBase::GetSingleArgument<int>("every_n", 1)) {
98  CAFFE_ENFORCE_GE(every_n_, 1);
99  }
100 
101  bool RunOnDevice() override {
102  if (++occurrences_mod_n_ > every_n_) {
103  occurrences_mod_n_ -= every_n_;
104  }
105  if (occurrences_mod_n_ != 1) {
106  return true;
107  }
108 
109  if (!OperatorBase::InputIsType<Tensor<Context>>(0) &&
110  !OperatorBase::InputIsType<TensorCPU>(0)) {
111  LOG(INFO) << "Blob of type: "
112  << OperatorBase::Inputs().at(0)->meta().name();
113  return true;
114  }
115  // special-case empty tensors since they may have no meta()
116  if (Input(0).size() == 0) {
117  tensor_printer_.PrintMeta(Input(0));
118  return true;
119  }
120 
121  using Types = TensorTypes<
122  float,
123  double,
124  int,
125  long,
126  bool,
127  char,
128  unsigned char,
129  std::string>;
130 
131  if (OperatorBase::InputIsType<TensorCPU>(0)) {
133  this, OperatorBase::Input<TensorCPU>(0));
134  } else {
135  return DispatchHelper<Types>::call(this, Input(0));
136  }
137  }
138 
139  private:
140  template <typename T>
141  bool DoRunWithType() {
142  // A simple strategy to copy tensor if needed, and have the tensor pointer
143  // pointing to the right instantiation. Note that tensor_copy_if_needed
144  // will handle memory deallocation itself so no smart pointer is needed.
145  const TensorCPU* tensor;
146  TensorCPU tensor_copy_if_needed;
147  if (OperatorBase::InputIsType<TensorCPU>(0)) {
148  tensor = &OperatorBase::Input<TensorCPU>(0);
149  } else {
150  tensor_copy_if_needed.CopyFrom(Input(0), &context_);
151  // Make sure that the copy is finished.
152  context_.FinishDeviceComputation();
153  tensor = &tensor_copy_if_needed;
154  }
155  tensor_printer_.Print<T>(*tensor);
156  return true;
157  }
158 
159  private:
160  TensorPrinter tensor_printer_;
161  int every_n_;
162  int occurrences_mod_n_{0};
163 };
164 
179 template <class Context>
180 class AliasOp final : public Operator<Context> {
181  public:
182  USE_OPERATOR_CONTEXT_FUNCTIONS;
183  USE_SIMPLE_CTOR_DTOR(AliasOp);
184 
185  bool RunOnDevice() override {
186  auto& input = Input(0);
187  CAFFE_ENFORCE_GE(input.size(), 0, "Tensor is not initialized");
188  Output(0)->ResizeLike(input);
189  Output(0)->ShareData(input);
190  return true;
191  }
192 };
193 
201 template <class Context>
202 class EnsureDenseOp final : public Operator<Context> {
203  public:
204  USE_OPERATOR_CONTEXT_FUNCTIONS;
205  USE_SIMPLE_CTOR_DTOR(EnsureDenseOp)
206 
207  bool RunOnDevice() override {
208  const auto& input = Input(0);
209  auto* output = Output(0);
210  CAFFE_ENFORCE_GT(input.ndim(), 0, "Input has to be at least a vector.");
211  // it is allowed to have the output inplace overwrite the input but also
212  // allow the output to be copied from the input
213  if (&input != output) {
214  output->ResizeLike(input);
215  output->CopyFrom(input, &context_);
216  }
217  return true;
218  }
219 };
220 
221 template <class Context>
222 class FlattenToVecOp : public Operator<Context> {
223  public:
224  USE_OPERATOR_CONTEXT_FUNCTIONS;
225  USE_SIMPLE_CTOR_DTOR(FlattenToVecOp);
226 
227  bool RunOnDevice() override {
228  auto& input = Input(0);
229  auto* output = Output(0);
230  CAFFE_ENFORCE_GE(
231  input.dims().size(), 1, "The rank of the tensor must be >= 1.");
232  output->Resize(input.size());
233 
234  context_.template CopyItems<Context, Context>(
235  input.meta(),
236  input.size(),
237  input.raw_data(),
238  output->raw_mutable_data(input.meta()));
239  return true;
240  }
241 };
242 
243 // Output gets the data of input(0), but reshapes it like input(1).
244 template <class Context>
245 class ResizeLikeOp : public Operator<Context> {
246  public:
247  USE_OPERATOR_CONTEXT_FUNCTIONS;
248  USE_SIMPLE_CTOR_DTOR(ResizeLikeOp);
249 
250  bool RunOnDevice() override {
251  auto& input0 = Input(0);
252  auto& input1 = Input(1);
253  auto* output = Output(0);
254  CAFFE_ENFORCE_EQ(input0.size(), input1.size());
255  output->ResizeLike(Input(1));
256  context_.template CopyItems<Context, Context>(
257  input0.meta(),
258  input0.size(),
259  input0.raw_data(),
260  output->raw_mutable_data(input0.meta()));
261  return true;
262  }
263 };
264 
265 template <class Context>
266 class SumOp : public Operator<Context> {
267  public:
268  USE_OPERATOR_CONTEXT_FUNCTIONS;
269  USE_SIMPLE_CTOR_DTOR(SumOp);
270 
271  template <typename T, typename M>
272  bool DoRunWithType() {
273  auto& input0 = Input(0);
274  auto* output = Output(0);
275  if (InputSize() == 1) {
276  output->CopyFrom(input0, &context_);
277  return true;
278  }
279  output->ResizeLike(input0);
280  T* output_data = output->template mutable_data<T>();
281  // Dimension checking
282  for (int i = 1; i < InputSize(); ++i) {
283  if (output->dims() != Input(i).dims()) {
284  CAFFE_THROW(
285  "Check failed: output->dims() == Input(i).dims().",
286  "Description: Input #",
287  i,
288  ", input dimension:",
289  Input(i).dims(),
290  " should match output dimension: ",
291  output->dims());
292  }
293  }
294 
295  // Add the first two - works if in-place or not.
296  math::Add(
297  output->size(),
298  input0.template data<T>(),
299  Input(1).template data<T>(),
300  output_data,
301  &context_);
302  // Add remaining.
303  for (int i = 2; i < InputSize(); ++i) {
304  math::Add(
305  output->size(),
306  output_data,
307  Input(i).template data<T>(),
308  output_data,
309  &context_);
310  }
311  return true;
312  }
313 
314  bool RunOnDevice() override {
315  if (Input(0).template IsType<float>()) {
316  return DoRunWithType<float, float>();
317  } else if (Input(0).template IsType<int>()) {
318  return DoRunWithType<int, int>();
319  } else {
320  CAFFE_THROW(
321  "Sum operator only supports 32-bit float and ints, but",
322  " input was of type ",
323  Input(0).meta().name());
324  }
325  }
326 };
327 
328 // WeightedSumOp computes the weighted sum of several tensors. The input should
329 // be in the form X_0, weight_0, X_1, weight_1, ... where X_i all have the same
330 // shape, and weight_i are size 1 tensors that specifies the weight of each
331 // vector. Note that if one wants to do in-place computation, it could only be
332 // done with X_0 also as the output, but not other X_i.
333 template <class Context>
334 class WeightedSumOp : public Operator<Context> {
335  public:
336  USE_OPERATOR_CONTEXT_FUNCTIONS;
337  USE_SIMPLE_CTOR_DTOR(WeightedSumOp);
338 
339  template <typename DstType>
340  bool DoRunWithType() {
341  CAFFE_ENFORCE_EQ(InputSize() % 2, 0);
342  auto& X0 = Input(0);
343  auto& weight0 = Input(1);
344  CAFFE_ENFORCE_GT(X0.size(), 0);
345  CAFFE_ENFORCE_EQ(weight0.size(), 1);
346  int size = X0.size();
347  auto* output = Output(0);
348  output->ResizeLike(X0);
349  math::Scale<DstType, Context>(
350  size,
351  weight0.template data<float>(),
352  X0.template data<DstType>(),
353  output->template mutable_data<DstType>(),
354  &context_);
355  for (int i = 2; i < InputSize(); i += 2) {
356  auto& X = Input(i);
357  // Do a check: if the input is the same as output, we have a problem -
358  // in-place update should always only happen with the zeroth input.
359  if (&X == output) {
360  LOG(ERROR) << "Input #" << i << " is the same as output. "
361  << "If you want to do in-place updates, put the output as "
362  << "input #0.";
363  return false;
364  }
365  auto& weight = Input(i + 1);
366  CAFFE_ENFORCE_EQ(X.size(), size);
367  CAFFE_ENFORCE_EQ(weight.size(), 1);
368  math::Axpy<DstType, Context>(
369  size,
370  weight.template data<float>(),
371  X.template data<DstType>(),
372  output->template mutable_data<DstType>(),
373  &context_);
374  }
375  return true;
376  }
377  bool RunOnDevice() override;
378 };
379 
380 template <class Context>
381 class WeightedSumGradientOp : public Operator<Context> {
382  public:
383  USE_OPERATOR_CONTEXT_FUNCTIONS;
384 
385  WeightedSumGradientOp(const OperatorDef& operator_def, Workspace* ws)
386  : Operator<Context>(operator_def, ws),
387  grad_on_w_(OperatorBase::GetSingleArgument<bool>("grad_on_w", false)) {}
388 
389  template <typename DstType>
390  bool DoRunWithType() {
391  CAFFE_ENFORCE_EQ(InputSize() % 2, 1);
392  auto output_size = grad_on_w_ ? InputSize() - 1 : InputSize() / 2;
393  CAFFE_ENFORCE_EQ(OutputSize(), output_size);
394 
395  auto& dY = Input(0);
396  const auto* dY_data = dY.template data<DstType>();
397  int size = dY.size();
398 
399  // The input size should be the input size of the forward op plus 1
400  for (int i = 0; i < InputSize() / 2; i++) {
401  auto& cur_w = Input(2 * i + 2);
402  CAFFE_ENFORCE_EQ(cur_w.size(), 1);
403  auto* cur_dX = Output(i);
404  cur_dX->ResizeLike(dY);
405 
406  math::Scale<DstType, Context>(
407  size,
408  cur_w.template data<float>(),
409  dY_data,
410  cur_dX->template mutable_data<DstType>(),
411  &context_);
412 
413  if (grad_on_w_) {
414  auto& cur_X = Input(2 * i + 1);
415  CAFFE_ENFORCE_EQ(cur_X.size(), size);
416  auto* cur_dw = Output(i + output_size / 2);
417  cur_dw->Resize(1);
418  math::Dot<DstType, Context>(
419  size,
420  dY_data,
421  cur_X.template data<DstType>(),
422  cur_dw->template mutable_data<float>(),
423  &context_);
424  }
425  }
426 
427  return true;
428  }
429 
430  bool RunOnDevice() override;
431 
432  private:
433  bool grad_on_w_;
434 };
435 
474 template <typename T, class Context>
475 class ScatterWeightedSumOp : public Operator<Context> {
476  public:
477  USE_OPERATOR_CONTEXT_FUNCTIONS;
478  USE_SIMPLE_CTOR_DTOR(ScatterWeightedSumOp);
479  USE_DISPATCH_HELPER;
480 
481  bool RunOnDevice() override {
482  return DispatchHelper<TensorTypes<int32_t, int64_t>>::call(this, Input(2));
483  }
484 
485  private:
486  template <typename Index>
487  bool DoRunWithType() {
488  TIndex block_size = Input(0).size_from_dim(1);
489  return DispatchHelper<FixedValues<1>, Index>::call(this, block_size);
490  }
491 
492  template <typename Index, int FixedSize>
493  bool DoRunWithValue() {
494  CAFFE_ENFORCE_EQ(InputSize() % 2, 1);
495  auto& X0 = Input(0);
496  auto& weight0 = Input(1);
497  auto& indices = Input(2);
498  auto* output = Output(0);
499  CAFFE_ENFORCE_EQ(&X0, output, "In place operation is required");
500 
501  CAFFE_ENFORCE_GT(X0.size(), 0);
502  CAFFE_ENFORCE_GT(X0.ndim(), 0, "X0 has to be at least the vector");
503  CAFFE_ENFORCE_EQ(weight0.size(), 1);
504  TIndex M = X0.size();
505  TIndex N = X0.dim(0);
506  TIndex K = indices.size();
507  TIndex block_size = M / N;
508  T* data = output->template mutable_data<T>();
509  const Index* idxs = indices.template data<Index>();
510  T w0 = *weight0.template data<T>();
511  // It's most likely a constant so exact comparison is fine
512  if (w0 != 1.0) {
513  for (int i = 0; i < K; ++i) {
514  Index idx = idxs[i];
515  CAFFE_ENFORCE(
516  0 <= idx && idx < N,
517  "Index out of bounds: ",
518  idx,
519  ", range 0 to ",
520  N);
521  math::ScaleFixedSize<T, Context, FixedSize>(
522  block_size,
523  w0,
524  data + block_size * idx,
525  data + block_size * idx,
526  &context_);
527  }
528  }
529  for (int inp = 3; inp < InputSize(); inp += 2) {
530  auto& X = Input(inp);
531  auto& weight = Input(inp + 1);
532  CAFFE_ENFORCE_EQ(X.size(), block_size * K);
533  CAFFE_ENFORCE_EQ(weight.size(), 1);
534  const T* x_data = X.template data<T>();
535  T w = *weight.template data<T>();
536  for (int i = 0; i < K; ++i) {
537  Index idx = idxs[i];
538  // double-checking the indices, but it's fine as it's DCHECK only
539  DCHECK(0 <= idx && idx < N) << "Index out of bounds: " << idx
540  << ", range 0 to " << N;
541  math::AxpyFixedSize<T, Context, FixedSize>(
542  block_size,
543  w,
544  x_data + block_size * i,
545  data + block_size * idx,
546  &context_);
547  }
548  }
549  return true;
550  }
551  Tensor<CPUContext> x_data_host_;
552  Tensor<CPUContext> weights_host_;
553  Tensor<Context> x_data_device_;
554  Tensor<Context> weights_device_;
555 };
556 
580 template <class Context>
581 class ScatterAssignOp : public Operator<Context> {
582  public:
583  USE_OPERATOR_CONTEXT_FUNCTIONS;
584  virtual ~ScatterAssignOp() {}
585 
586  ScatterAssignOp(const OperatorDef& operator_def, Workspace* ws)
587  : Operator<Context>(operator_def, ws),
588  runners_({{{TensorProto_DataType_INT32, TensorProto_DataType_FLOAT},
589  &ScatterAssignOp::DoRun<int32_t, float>},
590  {{TensorProto_DataType_INT32, TensorProto_DataType_FLOAT16},
591  &ScatterAssignOp::DoRun<int32_t, float16>},
592  {{TensorProto_DataType_INT32, TensorProto_DataType_INT32},
593  &ScatterAssignOp::DoRun<int32_t, int32_t>},
594  {{TensorProto_DataType_INT32, TensorProto_DataType_INT64},
595  &ScatterAssignOp::DoRun<int32_t, int64_t>},
596  {{TensorProto_DataType_INT64, TensorProto_DataType_FLOAT},
597  &ScatterAssignOp::DoRun<int64_t, float>},
598  {{TensorProto_DataType_INT64, TensorProto_DataType_FLOAT16},
599  &ScatterAssignOp::DoRun<int64_t, float16>},
600  {{TensorProto_DataType_INT64, TensorProto_DataType_INT32},
601  &ScatterAssignOp::DoRun<int64_t, int32_t>},
602  {{TensorProto_DataType_INT64, TensorProto_DataType_INT64},
603  &ScatterAssignOp::DoRun<int64_t, int64_t>}}) {}
604 
605  bool RunOnDevice() override {
606  const auto& data = Input(DATA);
607  const auto& slices = Input(SLICES);
608  auto& indices = Input(INDICES);
609 
610  const auto dataType = TypeMetaToDataType(data.meta());
611  const auto slicesType = TypeMetaToDataType(slices.meta());
612  const auto indicesType = TypeMetaToDataType(indices.meta());
613  auto* output = Output(0);
614 
615  auto runner = GetRunner(dataType, slicesType, indicesType);
616  (this->*runner)();
617  return true;
618  }
619 
620  private:
621  typedef void (ScatterAssignOp::*RunnerType)();
622  typedef std::
623  map<std::pair<TensorProto_DataType, TensorProto_DataType>, RunnerType>
624  RunnerMap;
625 
626  RunnerMap runners_;
627 
628  RunnerType GetRunner(
629  const TensorProto_DataType dataType,
630  const TensorProto_DataType slicesType,
631  const TensorProto_DataType indicesType) {
632  CAFFE_ENFORCE_EQ(dataType, slicesType, "Data and slice types must match");
633  auto it = runners_.find({indicesType, dataType});
634  CAFFE_ENFORCE(
635  it != runners_.end(),
636  "Could not find the runner corresponding to indicesType, dataType = ",
637  indicesType,
638  " ",
639  dataType);
640  return it->second;
641  }
642 
643  template <typename Index, typename T>
644  void DoRun() {
645  auto& input = Input(DATA);
646  auto& indices = Input(INDICES);
647  auto& slices = Input(SLICES);
648  auto* output = Output(0);
649  CAFFE_ENFORCE_EQ(&input, output, "In place operation is required");
650 
651  CAFFE_ENFORCE_GT(input.ndim(), 0, "X0 has to be at least the vector");
652  TIndex M = input.size();
653  TIndex N = input.dim(0);
654  TIndex K = indices.size();
655  TIndex block_size = M / N;
656  CAFFE_ENFORCE_EQ(slices.size(), block_size * K);
657  // TODO(dzhulgakov): it can be made to work with arbitrary data type by
658  // using raw_mutable_data
659  T* data = output->template mutable_data<T>();
660  const Index* idxs = indices.template data<Index>();
661  const T* slicesData = slices.template data<T>();
662  DoScatterAssign(data, idxs, slicesData, N, K, block_size);
663  }
664 
665  template <typename Index, typename T>
666  void DoScatterAssign(
667  T* data,
668  const Index* idxs,
669  const T* slicesData,
670  TIndex N,
671  TIndex K,
672  TIndex block_size) {
673  for (int i = 0; i < K; ++i) {
674  Index idx = idxs[i];
675  // double-checking the indices, but it's fine as it's DCHECK only
676  DCHECK(0 <= idx && idx < N) << "Index out of bounds: " << idx
677  << ", range 0 to " << N;
678  context_.template Copy<T, Context, Context>(
679  block_size, slicesData + block_size * i, data + block_size * idx);
680  }
681  }
682 
683  INPUT_TAGS(DATA, INDICES, SLICES);
684 };
685 
686 template <class Context, class DstContext, class SrcContext>
687 class CopyOp : public Operator<Context> {
688  public:
689  USE_OPERATOR_CONTEXT_FUNCTIONS;
690  USE_SIMPLE_CTOR_DTOR(CopyOp);
691 
692  bool RunOnDevice() override {
693  auto& input = OperatorBase::Input<Tensor<SrcContext>>(0);
694  auto* output = OperatorBase::Output<Tensor<DstContext>>(0);
695  output->ResizeLike(input);
696  this->context_.template CopyItems<SrcContext, DstContext>(
697  input.meta(),
698  input.size(),
699  input.raw_data(),
700  output->raw_mutable_data(input.meta()));
701  return true;
702  }
703 };
704 
705 template <class Context, class DstContext, class SrcContext>
706 class CopyOnDeviceLikeOp : public CopyOp<Context, DstContext, SrcContext> {
707  public:
708  CopyOnDeviceLikeOp(const OperatorDef& operator_def, Workspace* ws)
709  : CopyOp<Context, DstContext, SrcContext>(operator_def, ws) {}
710 };
711 
712 template <class Context>
713 class LengthsToSegmentIdsOp : public Operator<Context> {
714  public:
715  USE_OPERATOR_CONTEXT_FUNCTIONS;
716  USE_SIMPLE_CTOR_DTOR(LengthsToSegmentIdsOp);
717 
718  bool RunOnDevice() override {
719  auto& input = Input(0);
720  auto* output = Output(0);
721  auto* input_data = input.template data<int32_t>();
722 
723  CAFFE_ENFORCE(input.dims().size() == 1, "Input must be a vector.");
724  auto total_length =
725  std::accumulate(input_data, input_data + input.size(), 0);
726 
727  output->Resize(total_length);
728  auto* output_data = output->template mutable_data<int32_t>();
729 
730  for (int i = 0; i < input.size(); ++i) {
731  auto len = input_data[i];
732  std::fill(output_data, output_data + len, i);
733  output_data += len;
734  }
735  return true;
736  }
737 };
738 
739 template <class Context>
740 class LengthsToRangesOp : public Operator<Context> {
741  public:
742  USE_OPERATOR_CONTEXT_FUNCTIONS;
743  USE_SIMPLE_CTOR_DTOR(LengthsToRangesOp);
744 
745  bool RunOnDevice() override {
746  auto& input = Input(0);
747  auto* output = Output(0);
748  auto* input_data = input.template data<int32_t>();
749 
750  CAFFE_ENFORCE(input.dims().size() == 1, "Input must be a vector.");
751  auto size = input.size();
752 
753  output->Resize(size, 2);
754  auto* output_data = output->template mutable_data<int32_t>();
755 
756  int32_t offset = 0;
757  for (int i = 0; i < size; ++i) {
758  auto len = input_data[i];
759  output_data[i * 2] = offset;
760  output_data[i * 2 + 1] = len;
761  offset += len;
762  }
763  return true;
764  }
765 };
766 
767 template <class Context>
768 class SegmentIdsToLengthsOp : public Operator<Context> {
769  public:
770  USE_OPERATOR_CONTEXT_FUNCTIONS;
771  USE_SIMPLE_CTOR_DTOR(SegmentIdsToLengthsOp);
772 
773  bool RunOnDevice() override {
774  return DispatchHelper<TensorTypes<int32_t, int64_t>>::call(this, Input(0));
775  }
776 
777  template <typename Index>
778  bool DoRunWithType() {
779  auto& input = Input(0);
780  if (input.ndim() == 2) {
781  CAFFE_ENFORCE(
782  input.dim32(0) == 1 || input.dim32(1) == 1,
783  "Input must be a vector.");
784  } else {
785  CAFFE_ENFORCE_EQ(input.ndim(), 1, "Input must be a vector.");
786  }
787  auto* input_data = input.template data<Index>();
788  auto input_size = input.size();
789  auto* output = Output(0);
790  // segment id starts from 0
791  auto num_segments = input_size ? input_data[input_size - 1] + 1 : 0;
792  if (InputSize() > 1) {
793  CAFFE_ENFORCE_GE(Input(1).ndim(), 1);
794  CAFFE_ENFORCE_LE(
795  num_segments,
796  Input(1).dim(0),
797  "The number of segments inferred should *NOT* be larger "
798  "than the size of Input(1)'s first dimension");
799  num_segments = Input(1).dim(0);
800  }
801  CAFFE_ENFORCE(0 <= num_segments, "Indices must be in 0..K-1 range");
802  output->Resize(num_segments);
803  auto* output_data = output->template mutable_data<int32_t>();
804  if (num_segments == 0) {
805  return true;
806  }
807  std::fill(output_data, output_data + num_segments, 0);
808  Index prev = 0; // Assume that segment_id >= 0.
809  for (int64_t i = 0; i < input_size; i++) {
810  CAFFE_ENFORCE(
811  prev <= input_data[i],
812  "Segment ids must be sorted: ",
813  prev,
814  " vs ",
815  input_data[i]);
816  prev = input_data[i];
817  output_data[input_data[i]] += 1;
818  }
819 
820  return true;
821  }
822 };
823 
824 template <class Context>
825 class SegmentIdsToRangesOp : public Operator<Context> {
826  public:
827  USE_OPERATOR_CONTEXT_FUNCTIONS;
828  USE_SIMPLE_CTOR_DTOR(SegmentIdsToRangesOp);
829 
830  bool RunOnDevice() override {
831  return DispatchHelper<TensorTypes<int32_t, int64_t>>::call(this, Input(0));
832  }
833 
834  template <typename Index>
835  bool DoRunWithType() {
836  auto& input = Input(0);
837  CAFFE_ENFORCE(input.dims().size() == 1, "Input must be a vector.");
838  auto* input_data = input.template data<Index>();
839  auto input_size = input.size();
840  auto* output = Output(0);
841  // segment id starts from 0
842  auto num_segments = input_size ? input_data[input_size - 1] + 1 : 0;
843  if (InputSize() > 1) {
844  CAFFE_ENFORCE_GE(Input(1).ndim(), 1);
845  CAFFE_ENFORCE_LE(
846  num_segments,
847  Input(1).dim(0),
848  "The number of segments inferred should *NOT* be larger "
849  "than the size of Input(1)'s first dimension");
850  num_segments = Input(1).dim(0);
851  }
852  CAFFE_ENFORCE(0 <= num_segments, "Indices must be in 0..K-1 range");
853  output->Resize(num_segments, 2);
854  auto* output_data = output->template mutable_data<int32_t>();
855  if (num_segments == 0) {
856  return true;
857  }
858  std::fill(output_data, output_data + num_segments * 2, 0);
859  Index prev = input_data[0];
860  for (int64_t i = 0; i < input_size; i++) {
861  CAFFE_ENFORCE(
862  prev <= input_data[i],
863  "Segment ids must be sorted: ",
864  prev,
865  " vs ",
866  input_data[i]);
867  while (prev != input_data[i]) {
868  ++prev;
869  output_data[prev * 2] = i;
870  }
871  output_data[input_data[i] * 2 + 1] += 1;
872  }
873 
874  return true;
875  }
876 };
877 
878 template <class Context>
879 class LengthsToWeightsOp : public Operator<Context> {
880  public:
881  USE_OPERATOR_CONTEXT_FUNCTIONS;
882  LengthsToWeightsOp(const OperatorDef& operator_def, Workspace* ws)
883  : Operator<Context>(operator_def, ws),
884  power_(OperatorBase::GetSingleArgument<float>("power", 0.5)) {}
885 
886  bool RunOnDevice() override {
887  return DispatchHelper<TensorTypes<int32_t, int64_t>>::call(this, Input(0));
888  }
889 
890  template <typename Index>
891  bool DoRunWithType() {
892  auto& input = Input(0);
893  CAFFE_ENFORCE(input.dims().size() == 1, "Input must be a vector.");
894  auto* input_data = input.template data<Index>();
895  auto input_size = input.size();
896  auto* output = Output(0);
897 
898  int64_t output_size = 0;
899  for (auto i = 0; i < input_size; i++) {
900  CAFFE_ENFORCE_GE(input_data[i], 0, "unexpected negative length value");
901  output_size += input_data[i];
902  }
903 
904  std::function<float(const int64_t& length, const float& power)> getWeight;
905  if (power_ == 0.5) {
906  getWeight = [](const int64_t& length, const float& /*power*/) {
907  return 1.0 / std::sqrt(length);
908  };
909  } else if (power_ == 1) {
910  getWeight = [](const int64_t& length, const float& /*power*/) {
911  return 1.0 / length;
912  };
913  } else {
914  getWeight = [](const int64_t& length, const float& power) {
915  return 1.0 / std::pow(length, power);
916  };
917  }
918 
919  output->Resize(output_size);
920  auto* output_data = output->template mutable_data<float>();
921  int64_t cnt = 0;
922  for (auto i = 0; i < input_size; i++) {
923  auto len = input_data[i];
924  if (len == 0) {
925  continue;
926  }
927  CAFFE_ENFORCE_LE(cnt + len, output_size, "unexpected lengths value");
928 
929  float weight_value = getWeight(len, power_);
930  std::fill(output_data + cnt, output_data + cnt + len, weight_value);
931  cnt += len;
932  }
933 
934  return true;
935  }
936 
937  private:
938  float power_;
939 };
940 
941 template <class Context>
942 class HasElementsOp : public Operator<Context> {
943  public:
944  USE_OPERATOR_CONTEXT_FUNCTIONS;
945  USE_SIMPLE_CTOR_DTOR(HasElementsOp);
946 
947  bool RunOnDevice() override {
948  auto& input = Input(0);
949  auto* output = OperatorBase::Output<TensorCPU>(0);
950  output->Resize(std::vector<TIndex>{});
951  *output->template mutable_data<bool>() = input.size() > 0;
952  return true;
953  }
954 };
955 
956 template <class Context>
957 class IsEmptyOp : public Operator<Context> {
958  public:
959  USE_OPERATOR_CONTEXT_FUNCTIONS;
960  USE_SIMPLE_CTOR_DTOR(IsEmptyOp);
961 
962  bool RunOnDevice() override {
963  auto& input = Input(0);
964  auto* output = OperatorBase::Output<TensorCPU>(0);
965  output->Resize(std::vector<TIndex>{});
966  *output->template mutable_data<bool>() = (input.size() == 0);
967  return true;
968  }
969 };
970 
971 // Return the size of a tensor
972 template <class Context>
973 class SizeOp : public Operator<Context> {
974  public:
975  USE_OPERATOR_CONTEXT_FUNCTIONS;
976  USE_SIMPLE_CTOR_DTOR(SizeOp);
977 
978  bool RunOnDevice() override {
979  auto& input = Input(0);
980  auto* output = Output(0);
981 
982  output->Resize(vector<TIndex>());
983  auto* output_data = output->template mutable_data<int64_t>();
984 
985  auto size = input.size();
986  math::Set<int64_t, Context>(
987  1, static_cast<int64_t>(size), output_data, &context_);
988 
989  return true;
990  }
991 };
992 
993 // returns a shape to be passed to Reshape
994 template <class Context>
995 class LengthsToShapeOp : public Operator<Context> {
996  public:
997  USE_OPERATOR_CONTEXT_FUNCTIONS;
998  USE_SIMPLE_CTOR_DTOR(LengthsToShapeOp);
999 
1000  bool RunOnDevice() override {
1001  auto& input = Input(0);
1002 
1003  CAFFE_ENFORCE(input.dims().size() == 1, "Input must be a vector.");
1004  auto* output = Output(0);
1005  auto* input_data = input.template data<int32_t>();
1006 
1007  auto size = input.size();
1008  auto first = input_data[0];
1009 
1010  for (int i = 1; i < size; i++) {
1011  CAFFE_ENFORCE(
1012  input_data[i] == first, "All elements of input must be same ");
1013  }
1014 
1015  output->Resize(2);
1016  auto* output_data = output->template mutable_data<int32_t>();
1017  output_data[0] = size;
1018  output_data[1] = first;
1019 
1020  return true;
1021  }
1022 };
1023 
1024 template <class Context>
1025 class GatherOp : public Operator<Context> {
1026  public:
1027  USE_OPERATOR_CONTEXT_FUNCTIONS;
1028  USE_SIMPLE_CTOR_DTOR(GatherOp);
1029 
1030  bool RunOnDevice() override {
1032  this, OperatorBase::Input<TensorCPU>(INDICES));
1033  }
1034 
1035  template <typename Index>
1036  bool DoRunWithType() {
1037  // If we endup using it on GPU doing O(N) memcpy is probably not best :)
1038  // TODO: implement prefetching if it starts mattering (TF does it)
1039  auto& data = Input(DATA);
1040  auto& indices = Input(INDICES);
1041  auto* output = Output(0);
1042 
1043  CAFFE_ENFORCE_GE(data.ndim(), 1, "DATA should be at least 1-D");
1044  auto shape = indices.dims();
1045  shape.insert(shape.end(), data.dims().begin() + 1, data.dims().end());
1046  output->Resize(shape);
1047 
1048  int block_size = data.size_from_dim(1);
1049  auto block_bytesize = data.size_from_dim(1) * data.meta().itemsize();
1050  int N = indices.size();
1051 
1052  auto src_base = static_cast<const char*>(data.raw_data());
1053  const Index* idxs = indices.template data<Index>();
1054  auto out = static_cast<char*>(output->raw_mutable_data(data.meta()));
1055 
1056  for (int i = 0; i < N; ++i) {
1057  auto idx = idxs[i];
1058  CAFFE_ENFORCE(
1059  0 <= idx && idx < data.dim(0),
1060  "INDICES element is out of DATA bounds, id=",
1061  idx,
1062  " data_dim=",
1063  data.dim(0));
1064  auto src = src_base + idx * block_bytesize;
1065  context_.template CopyItems<Context, Context>(
1066  data.meta(), block_size, src, out + block_bytesize * i);
1067  }
1068  return true;
1069  }
1070 
1071  INPUT_TAGS(DATA, INDICES);
1072 };
1073 
1074 template <class Context>
1075 class GatherRangesOp : public Operator<Context> {
1076  public:
1077  USE_OPERATOR_CONTEXT_FUNCTIONS;
1078  USE_SIMPLE_CTOR_DTOR(GatherRangesOp);
1079 
1080  bool RunOnDevice() override {
1082  this, OperatorBase::Input<TensorCPU>(RANGES));
1083  }
1084 
1085  template <typename Index>
1086  bool DoRunWithType() {
1087  auto& data = Input(DATA);
1088  auto& ranges = Input(RANGES);
1089  auto* outputData = Output(0);
1090  auto* outputLengths = Output(1);
1091 
1092  auto batchSize = ranges.dim(0);
1093  CAFFE_ENFORCE(data.ndim() == 1, "Data has to be 1-D");
1094  CAFFE_ENFORCE(ranges.ndim() == 3, "Ranges must be 3-D");
1095  CAFFE_ENFORCE(ranges.dim(1) > 0, "There has to be at least one range");
1096  CAFFE_ENFORCE_EQ(
1097  ranges.dim(2), 2, "Ranges last dimention should be of size 2");
1098 
1099  auto* rawData = static_cast<const char*>(data.raw_data());
1100  auto* rangesData = ranges.template data<Index>();
1101 
1102  outputLengths->Resize(batchSize);
1103  auto* outputLengthsPtr = outputLengths->template mutable_data<int32_t>();
1104  size_t start = 0;
1105  size_t blockSize = ranges.size_from_dim(1);
1106  for (size_t i = 0; i < batchSize; ++i) {
1107  auto end = start + blockSize;
1108  outputLengthsPtr[i] = accumulate(rangesData, start, end);
1109  start = end;
1110  }
1111 
1112  size_t outputSize = accumulate(rangesData, 0, ranges.size());
1113  outputData->Resize(outputSize);
1114 
1115  auto outputRawData =
1116  static_cast<char*>(outputData->raw_mutable_data(data.meta()));
1117  VLOG(1) << "Copying data";
1118  size_t outputOffsetBytes = 0;
1119  auto itemsize = data.meta().itemsize();
1120  for (int i = 0; i < ranges.size(); i += 2) {
1121  auto rangeStart = rangesData[i];
1122  auto rangeLength = rangesData[i + 1];
1123  if (!rangeLength) {
1124  continue;
1125  }
1126  auto rangeSizeBytes = rangeLength * itemsize;
1127  CAFFE_ENFORCE(outputOffsetBytes < outputSize * itemsize);
1128  CAFFE_ENFORCE(rangeStart + rangeLength <= data.size());
1129  context_.template CopyItems<Context, Context>(
1130  data.meta(),
1131  rangeLength,
1132  rawData + rangeStart * itemsize,
1133  outputRawData + outputOffsetBytes);
1134  outputOffsetBytes += rangeSizeBytes;
1135  }
1136  CAFFE_ENFORCE(outputOffsetBytes == outputSize * itemsize);
1137  return true;
1138  }
1139 
1140  INPUT_TAGS(DATA, RANGES, LENGTHS);
1141 
1142  private:
1143  template <typename Index>
1144  size_t accumulate(Index* ranges, size_t start, size_t end) {
1145  size_t result = 0;
1146  for (int i = start + 1; i < end; i += 2) {
1147  result += ranges[i];
1148  }
1149  return result;
1150  }
1151 };
1152 
1153 template <class Context>
1154 class LengthsGatherOp : public Operator<Context> {
1155  public:
1156  USE_OPERATOR_CONTEXT_FUNCTIONS;
1157  USE_SIMPLE_CTOR_DTOR(LengthsGatherOp);
1158 
1159  bool RunOnDevice() override {
1161  this, OperatorBase::Input<TensorCPU>(INDICES));
1162  }
1163 
1164  template <typename Index>
1165  bool DoRunWithType() {
1166  auto& items = Input(ITEMS);
1167  auto& lengths = Input(LENGTHS);
1168  auto& indices = Input(INDICES);
1169  auto* output = Output(0);
1170 
1171  CAFFE_ENFORCE_GE(items.ndim(), 1, "ITEMS should be at least 1-D");
1172  CAFFE_ENFORCE_EQ(lengths.ndim(), 1, "LENGTHS should be 1-D");
1173  CAFFE_ENFORCE_EQ(indices.ndim(), 1, "INDICES should be 1-D");
1174 
1175  const auto* lengths_data = lengths.template data<int32_t>();
1176  const auto* indices_data = indices.template data<Index>();
1177 
1178  TIndex total_length = 0;
1179  for (size_t i = 0; i < indices.size(); ++i) {
1180  auto idx = indices_data[i];
1181  CAFFE_ENFORCE_LT(idx, lengths.size());
1182  total_length += lengths_data[idx];
1183  }
1184  auto shape = items.dims();
1185  shape[0] = total_length;
1186  output->Resize(shape);
1187 
1188  offsets_.clear();
1189  TIndex running_offset = 0;
1190  offsets_.reserve(lengths.size());
1191  for (size_t i = 0; i < lengths.size(); ++i) {
1192  offsets_.push_back(running_offset);
1193  running_offset += lengths_data[i];
1194  }
1195  CAFFE_ENFORCE_EQ(
1196  items.dim(0),
1197  running_offset,
1198  "LENGTHS must match the first dimension of ITEMS");
1199 
1200  auto src_base = static_cast<const char*>(items.raw_data());
1201  auto block_size = items.size_from_dim(1);
1202  auto block_bytesize = block_size * items.itemsize();
1203  auto out = static_cast<char*>(output->raw_mutable_data(items.meta()));
1204 
1205  for (size_t i = 0; i < indices.size(); ++i) {
1206  auto idx = indices_data[i];
1207  auto length = lengths_data[idx];
1208  context_.template CopyItems<Context, Context>(
1209  items.meta(),
1210  length * block_size,
1211  src_base + offsets_[idx] * block_bytesize,
1212  out);
1213  out += length * block_bytesize;
1214  }
1215  return true;
1216  }
1217 
1218  std::vector<TIndex> offsets_;
1219 
1220  INPUT_TAGS(ITEMS, LENGTHS, INDICES);
1221 };
1222 
1223 // Since we just do copying, consider untemplating it on T and using raw_data()
1229 template <class Context>
1230 class UniqueOp : public Operator<Context> {
1231  public:
1232  USE_OPERATOR_CONTEXT_FUNCTIONS;
1233  USE_SIMPLE_CTOR_DTOR(UniqueOp);
1234 
1235  bool RunOnDevice() override {
1236  // Use run-time polymorphism
1237  auto& input = Input(0);
1238  if (input.template IsType<int32_t>()) {
1239  DoRun<int32_t>();
1240  } else if (input.template IsType<int64_t>()) {
1241  DoRun<int64_t>();
1242  } else {
1243  LOG(FATAL) << "Unsupported type of input in Unique: "
1244  << input.meta().name();
1245  }
1246  return true;
1247  }
1248 
1249  private:
1250  vector<int> order_;
1251  Tensor<Context> thrust_unique_buffer_;
1252  Tensor<Context> cuda_order_buffer_;
1253  Tensor<Context> second_order_buffer_;
1254 
1255  template <typename T>
1256  void DoRun();
1257 
1258  public:
1259  OUTPUT_TAGS(UNIQUE, REMAPPING);
1260 };
1261 
1262 template <class Context>
1263 class UnsafeCoalesceOp final : public Operator<Context> {
1264  public:
1265  USE_OPERATOR_CONTEXT_FUNCTIONS;
1267 
1268  bool RunOnDevice() override {
1269  size_t coalesced_size = 0;
1270  for (int i = 0; i < InputSize(); ++i) {
1271  CAFFE_ENFORCE(
1272  !Input(i).meta().ctor(),
1273  "Must only coalesce fundamental types, error at input: ",
1274  i);
1275  }
1276 
1277  auto roundToAlignment = [](size_t bytes) -> size_t {
1278  return ((bytes + gCaffe2Alignment - 1) / gCaffe2Alignment) *
1279  gCaffe2Alignment;
1280  };
1281 
1282  for (int i = 0; i < InputSize(); ++i) {
1283  coalesced_size += roundToAlignment(Input(i).nbytes());
1284  }
1285 
1286  auto* coalesced = Output(OutputSize() - 1);
1287  coalesced->Resize(coalesced_size);
1288  math::Set<uint8_t, Context>(
1289  coalesced_size,
1290  0.0,
1291  coalesced->template mutable_data<uint8_t>(),
1292  &context_);
1293 
1294  size_t coalesced_offset = 0;
1295  for (auto i = 0; i < InputSize(); ++i) {
1296  const auto input_nbytes = Input(i).nbytes();
1297  context_.template CopyBytes<Context, Context>(
1298  input_nbytes,
1299  (const uint8_t*)Input(i).raw_data(),
1300  coalesced->template mutable_data<uint8_t>() + coalesced_offset);
1301 
1302  // Note: this could cause Input(i) to free it's data if
1303  // Output(i) and Input(i) alias each other. This is safe on a
1304  // GPU (as the copy will happen-before the free), but it's
1305  // worth mentioning.
1306 
1307  Output(i)->ResizeLike(Input(i));
1308  Output(i)->ShareExternalPointer(
1309  static_cast<void*>(
1310  coalesced->template mutable_data<uint8_t>() + coalesced_offset),
1311  Input(i).meta(),
1312  input_nbytes);
1313  coalesced_offset += roundToAlignment(input_nbytes);
1314  }
1315  return true;
1316  }
1317 };
1318 
1319 template <typename T, class Context>
1320 class AccumulateHistogramOp : public Operator<Context> {
1321  public:
1322  AccumulateHistogramOp(const OperatorDef& def, Workspace* ws)
1323  : Operator<Context>(def, ws),
1324  lower_bound_(
1325  OperatorBase::GetSingleArgument<float>("lower_bound", 0.0)),
1326  upper_bound_(
1327  OperatorBase::GetSingleArgument<float>("upper_bound", 1.0)),
1328  num_buckets_(OperatorBase::GetSingleArgument<int>("num_buckets", 1)) {
1329  CAFFE_ENFORCE_GT(num_buckets_, 0);
1330  // 2 more for histograms < lower_bound, >= upper_bound respectively
1331  num_output_buckets_ = num_buckets_ + 2;
1332  accumulate_hist_ = std::vector<int64_t>(num_output_buckets_, 0);
1333  }
1334 
1335  USE_OPERATOR_CONTEXT_FUNCTIONS;
1336 
1337  bool RunOnDevice() override {
1338  auto& X = Input(X_IN);
1339  auto* X_data = X.template data<T>();
1340  int N = X.size();
1341  auto* cur_hist = Output(CUR_HIST);
1342  auto* acc_hist = Output(ACC_HIST);
1343  cur_hist->Resize(num_output_buckets_);
1344  acc_hist->Resize(num_output_buckets_);
1345  auto* cur_hist_data = cur_hist->template mutable_data<int64_t>();
1346  auto* acc_hist_data = acc_hist->template mutable_data<int64_t>();
1347  auto segment = (upper_bound_ - lower_bound_) / num_buckets_;
1348  math::Set<int64_t, Context>(
1349  num_output_buckets_, 0, cur_hist_data, &context_);
1350 
1351  for (int i = 0; i < N; i++) {
1352  int bucket_index = -1;
1353  if (X_data[i] < lower_bound_) {
1354  bucket_index = 0;
1355  } else if (X_data[i] >= upper_bound_) {
1356  bucket_index = num_buckets_ + 1;
1357  } else {
1358  bucket_index = (int)((X_data[i] - lower_bound_) / segment) + 1;
1359  }
1360  cur_hist_data[bucket_index] += 1;
1361  accumulate_hist_[bucket_index] += 1;
1362  }
1363 
1364  for (int i = 0; i < num_output_buckets_; i++) {
1365  acc_hist_data[i] = accumulate_hist_[i];
1366  }
1367 
1368  return true;
1369  }
1370 
1371  private:
1372  float lower_bound_;
1373  float upper_bound_;
1374  int num_buckets_;
1375  int num_output_buckets_;
1376  std::vector<int64_t> accumulate_hist_;
1377 
1378  INPUT_TAGS(X_IN);
1379  OUTPUT_TAGS(CUR_HIST, ACC_HIST);
1380 };
1381 
1382 template <class Context>
1383 class RangeOp : public Operator<Context> {
1384  public:
1385  USE_OPERATOR_CONTEXT_FUNCTIONS;
1386  USE_SIMPLE_CTOR_DTOR(RangeOp)
1387 
1388  bool RunOnDevice() override {
1390  this, Input(0));
1391  }
1392 
1393  template <typename T>
1394  T readScalarInput(const int index) {
1395  if (std::is_same<Context, TensorCPU>::value) {
1396  return Input(index).template data<T>()[0];
1397  } else {
1398  local_.template CopyFrom<Context>(Input(index));
1399  return local_.template data<T>()[0];
1400  }
1401  }
1402 
1403  template <typename T>
1404  bool DoRunWithType() {
1405  T stop = 0;
1406  T start = 0;
1407  T step = 1;
1408 
1409  for (int i = 0; i < InputSize(); ++i) {
1410  CAFFE_ENFORCE_EQ(Input(0).ndim(), 0, "All inputs must be scalar.");
1411  }
1412 
1413  switch (InputSize()) {
1414  case 1:
1415  stop = readScalarInput<T>(0);
1416  break;
1417  case 2:
1418  start = readScalarInput<T>(0);
1419  stop = readScalarInput<T>(1);
1420  break;
1421  case 3:
1422  step = readScalarInput<T>(2);
1423  start = readScalarInput<T>(0);
1424  stop = readScalarInput<T>(1);
1425  break;
1426  }
1427  CAFFE_ENFORCE_NE(step, 0, "Step size cannot be 0.");
1428  int length;
1429  auto diff = stop - start;
1430  if (std::is_integral<T>::value) {
1431  // Avoid casting to and from floats in case it introduces rounding and
1432  // avoid mod because the compiler doesn't strip unused code until later.
1433  length = diff / step;
1434  if (length * step < diff) {
1435  length += 1;
1436  }
1437  } else {
1438  length = static_cast<int>(ceil(diff / step));
1439  }
1440  auto* output = Output(0);
1441  // Match numpy's behavior here.
1442  if (length <= 0) {
1443  output->Resize(0);
1444  // Called for the side effect of setting the data.
1445  output->template mutable_data<T>();
1446  return true;
1447  } else {
1448  output->Resize(length);
1449  return DoRunOnDevice<T>(start, step, output);
1450  }
1451  }
1452 
1453  template <typename T>
1454  bool DoRunOnDevice(const T& start, const T& step, Tensor<Context>* output);
1455 
1456  private:
1457  // local CPU tensor for copying constants.
1458  TensorCPU local_;
1459 };
1460 
1461 } // namespace caffe2
1462 
1463 #endif // CAFFE2_OPERATORS_UTILITY_OPS_H_
const string & RootFolder()
Return the root folder of the workspace.
Definition: workspace.h:183
const TypeMeta & meta() const
Returns the TypeMeta object associated with the current data type.
Definition: tensor.h:664
Tensor is the basic class in Caffe2 that stores a contiguous memory with its shape information...
Definition: tensor.h:109
Update slices of the tensor in-place by overriding.
Definition: utility_ops.h:581
void CopyFrom(const Tensor< SrcContext > &src, ContextForCopy *context)
Copies the data from a source tensor, with a contex provided to carry out the underlying memcpy opera...
Definition: tensor.h:182
TIndex size() const
Returns the size (i.e.
Definition: tensor.h:609
Pass inputs to outputs.
Definition: utility_ops.h:202
Workspace is a class that holds all the related objects created during runtime: (1) all blobs...
Definition: workspace.h:63
void Resize(Ts...dim_source)
Resizes a tensor.
Definition: tensor.h:304
Copyright (c) 2016-present, Facebook, Inc.
Update slices of the tensor in-place with weighted sum.
Definition: utility_ops.h:475
Alias op makes the output and the input share the same underlying storage.
Definition: utility_ops.h:180
Deduplicates input indices vector and optionally produces reverse remapping.
Definition: utility_ops.h:1230