Caffe2 - C++ API
A deep learning, cross platform ML framework
concat_split_op.cc
1 #include <caffe2/ideep/ideep_utils.h>
2 #include <caffe2/ideep/operators/operator_fallback_ideep.h>
3 #include <caffe2/operators/concat_split_op.h>
4 
5 namespace caffe2 {
6 
7 class IDEEPConcatOp final : public IDEEPOperator {
8  public:
9  USE_IDEEP_DEF_ALIASES();
10  USE_IDEEP_OPERATOR_FUNCTIONS();
12 
13  IDEEPConcatOp(const OperatorDef& operator_def, Workspace* ws)
14  : IDEEPOperator(operator_def, ws),
15  fallback_(operator_def, ws) {
16  CAFFE_ENFORCE(
18  "You shouldn't specify both the dim to concat, and the order "
19  "in the case of 4-D images.");
20  if (OperatorBase::HasArgument("axis")) {
21  axis_ = OperatorBase::GetSingleArgument<int>("axis", -1);
22  add_axis_ = OperatorBase::GetSingleArgument<int>("add_axis", 0);
23  } else {
24  axis_ = 1;
25  add_axis_ = 0;
26  }
27  CAFFE_ENFORCE_GE(axis_, 0);
28  }
29  ~IDEEPConcatOp() override {}
30 
31  bool RunOnDevice() override {
32  bool fallback_to_cpu = false;
33  vector<itensor> inputs_itensor;
34 
35  for (int i = 0; i < InputSize(); ++i) {
36  if (OperatorBase::InputBlob(i).template IsType<itensor>()) {
37  auto& tensor_ideep = Input(i);
38  if (tensor_ideep.ndims() == 0 || tensor_ideep.get_nelems() == 0)
39  continue;
40  inputs_itensor.emplace_back(tensor_ideep);
41  } else {
42  CAFFE_ENFORCE(
43  BlobIsTensorType(OperatorBase::InputBlob(i), CPU),
44  "Expect cpu tensor if not itensor");
45  auto& tensor_cpu = OperatorBase::Input<Tensor>(i, CPU);
46  if (tensor_cpu.sizes().size() == 0 || tensor_cpu.numel() == 0)
47  continue;
48  fallback_to_cpu = true;
49  break;
50  }
51  }
52 
53  if (!fallback_to_cpu) {
54  auto* output = Output(OUTPUT);
55  Tensor* axis_info = OutputTensor(AXIS_INFO,
56  vector<int64_t>(1, InputSize()), at::dtype<int>().device(CPU));
57  auto* axis_data = axis_info->template mutable_data<int>();
58  auto axis_vdata =
59  ideep::concat::compute(inputs_itensor, axis_, add_axis_, *output);
60  for (int i = 0; i < axis_vdata.size(); i++) {
61  axis_data[i] = axis_vdata[i];
62  }
63  return true;
64  }
65 
66  return fallback_.Run(0);
67  }
68 
69  private:
70  int axis_;
71  int add_axis_;
72  FALLBACK_OP fallback_;
73 
74  INPUT_TAGS(INPUT0);
75  OUTPUT_TAGS(OUTPUT, AXIS_INFO);
76 };
77 
78 class IDEEPSplitOp final : public IDEEPOperator {
79  public:
80  USE_IDEEP_DEF_ALIASES();
81  USE_IDEEP_OPERATOR_FUNCTIONS();
82 
83  IDEEPSplitOp(const OperatorDef& operator_def, Workspace* ws)
84  : IDEEPOperator(operator_def, ws),
85  axis_offset_(OperatorBase::GetRepeatedArgument<int>("split")) {
86  CAFFE_ENFORCE(
88  "You shouldn't specify both the dim to split, and the order "
89  "in the case of 4-D images.");
90  if (OperatorBase::HasArgument("axis")) {
91  axis_ = OperatorBase::GetSingleArgument<int>("axis", -1);
92  // only exists for computing the gradient of a Concat with 'add_axis'
93  add_axis_ = OperatorBase::GetSingleArgument<int>("add_axis", 0);
94  } else {
95  axis_ = 1;
96  add_axis_ = 0;
97  }
98  CAFFE_ENFORCE_GE(axis_, 0);
99  }
100  ~IDEEPSplitOp() override {}
101 
102  bool RunOnDevice() override {
103  const auto& input = Input(INPUT);
104  CAFFE_ENFORCE_LT(axis_, input.ndims(), "Axis not in input ndim range.");
105  const int input_channels = input.get_dim(axis_);
106  vector<int> axis_vdata(OutputSize(), 0);
107  if (InputSize() == 2) {
108  // We obtain split from the input tensor.
109  CAFFE_ENFORCE_EQ(
110  axis_offset_.size(),
111  0,
112  "If you set split with an input blob, do not pass in "
113  "split in the argument.");
114  auto& axis_info = OperatorBase::Input<Tensor>(AXIS_INFO, CPU);
115  CAFFE_ENFORCE_EQ(axis_info.numel(), OutputSize());
116  auto* axis_data = axis_info.template data<int>();
117  axis_vdata.assign(axis_data, axis_data + OutputSize());
118  } else if (axis_offset_.size() == 0) {
119  CAFFE_ENFORCE_EQ(
120  input_channels % OutputSize(),
121  0,
122  "If you did not specify split explicitly, the number of "
123  "input channels should be divisible by the output size.");
124  axis_vdata.assign(OutputSize(), input_channels / OutputSize());
125  } else {
126  // We obtain split from the parameters.
127  CAFFE_ENFORCE_EQ(
128  axis_offset_.size(),
129  OutputSize(),
130  "The number of splits specified should be equal to the "
131  "number of outputs.");
132  axis_vdata = axis_offset_;
133  }
134 
135  CAFFE_ENFORCE_EQ(
136  add_axis_ ? OutputSize()
137  : std::accumulate(
138  axis_vdata.data(), axis_vdata.data() + OutputSize(), 0),
139  input_channels,
140  "Sum of split dimensions do not match: should be ",
141  input_channels);
142 
143  auto iten_vector = ideep::spliter::compute(
144  input, axis_vdata, axis_, add_axis_);
145  CAFFE_ENFORCE_EQ(
146  iten_vector.size(),
147  OutputSize(),
148  "Output size does not match: should be ",
149  OutputSize());
150 
151  for (int i = 0; i < OutputSize(); i++) {
152  auto* output = Output(i);
153  *output = iten_vector[i];
154  }
155 
156  return true;
157  }
158 
159  private:
160  int axis_;
161  int add_axis_;
162  vector<int> axis_offset_;
163 
164  INPUT_TAGS(INPUT, AXIS_INFO);
165 };
166 
167 
168 REGISTER_IDEEP_OPERATOR(Concat, IDEEPConcatOp);
169 REGISTER_IDEEP_OPERATOR(Split, IDEEPSplitOp);
170 
171 } // namespace caffe2
Workspace is a class that holds all the related objects created during runtime: (1) all blobs...
Definition: workspace.h:47
constexpr size_t size() const
size - Get the array size.
Definition: ArrayRef.h:138
A global dictionary that holds information about what Caffe2 modules have been loaded in the current ...
Definition: blob.h:13
A templated class to allow one to wrap a CPU operator as an IDEEP operator.
bool HasArgument(const string &name) const
Checks if the operator has an argument of the given name.
Definition: operator.h:70