1 #include <caffe2/ideep/ideep_utils.h> 2 #include <caffe2/ideep/operators/operator_fallback_ideep.h> 3 #include <caffe2/operators/concat_split_op.h> 9 USE_IDEEP_DEF_ALIASES();
10 USE_IDEEP_OPERATOR_FUNCTIONS();
15 fallback_(operator_def, ws) {
18 "You shouldn't specify both the dim to concat, and the order " 19 "in the case of 4-D images.");
21 axis_ = OperatorBase::GetSingleArgument<int>(
"axis", -1);
22 add_axis_ = OperatorBase::GetSingleArgument<int>(
"add_axis", 0);
27 CAFFE_ENFORCE_GE(axis_, 0);
31 bool RunOnDevice()
override {
32 bool fallback_to_cpu =
false;
33 vector<itensor> inputs_itensor;
35 for (
int i = 0; i < InputSize(); ++i) {
36 if (OperatorBase::InputBlob(i).template IsType<itensor>()) {
37 auto& tensor_ideep = Input(i);
38 if (tensor_ideep.ndims() == 0 || tensor_ideep.get_nelems() == 0)
40 inputs_itensor.emplace_back(tensor_ideep);
43 BlobIsTensorType(OperatorBase::InputBlob(i), CPU),
44 "Expect cpu tensor if not itensor");
45 auto& tensor_cpu = OperatorBase::Input<Tensor>(i, CPU);
46 if (tensor_cpu.sizes().
size() == 0 || tensor_cpu.numel() == 0)
48 fallback_to_cpu =
true;
53 if (!fallback_to_cpu) {
54 auto* output = Output(OUTPUT);
55 Tensor* axis_info = OutputTensor(AXIS_INFO,
56 vector<int64_t>(1, InputSize()), at::dtype<int>().device(CPU));
57 auto* axis_data = axis_info->template mutable_data<int>();
59 ideep::concat::compute(inputs_itensor, axis_, add_axis_, *output);
60 for (
int i = 0; i < axis_vdata.size(); i++) {
61 axis_data[i] = axis_vdata[i];
66 return fallback_.Run(0);
75 OUTPUT_TAGS(OUTPUT, AXIS_INFO);
80 USE_IDEEP_DEF_ALIASES();
81 USE_IDEEP_OPERATOR_FUNCTIONS();
85 axis_offset_(OperatorBase::GetRepeatedArgument<int>(
"split")) {
88 "You shouldn't specify both the dim to split, and the order " 89 "in the case of 4-D images.");
91 axis_ = OperatorBase::GetSingleArgument<int>(
"axis", -1);
93 add_axis_ = OperatorBase::GetSingleArgument<int>(
"add_axis", 0);
98 CAFFE_ENFORCE_GE(axis_, 0);
102 bool RunOnDevice()
override {
103 const auto& input = Input(INPUT);
104 CAFFE_ENFORCE_LT(axis_, input.ndims(),
"Axis not in input ndim range.");
105 const int input_channels = input.get_dim(axis_);
106 vector<int> axis_vdata(OutputSize(), 0);
107 if (InputSize() == 2) {
112 "If you set split with an input blob, do not pass in " 113 "split in the argument.");
114 auto& axis_info = OperatorBase::Input<Tensor>(AXIS_INFO, CPU);
115 CAFFE_ENFORCE_EQ(axis_info.numel(), OutputSize());
116 auto* axis_data = axis_info.template data<int>();
117 axis_vdata.assign(axis_data, axis_data + OutputSize());
118 }
else if (axis_offset_.size() == 0) {
120 input_channels % OutputSize(),
122 "If you did not specify split explicitly, the number of " 123 "input channels should be divisible by the output size.");
124 axis_vdata.assign(OutputSize(), input_channels / OutputSize());
130 "The number of splits specified should be equal to the " 131 "number of outputs.");
132 axis_vdata = axis_offset_;
136 add_axis_ ? OutputSize()
138 axis_vdata.data(), axis_vdata.data() + OutputSize(), 0),
140 "Sum of split dimensions do not match: should be ",
143 auto iten_vector = ideep::spliter::compute(
144 input, axis_vdata, axis_, add_axis_);
148 "Output size does not match: should be ",
151 for (
int i = 0; i < OutputSize(); i++) {
152 auto* output = Output(i);
153 *output = iten_vector[i];
162 vector<int> axis_offset_;
164 INPUT_TAGS(INPUT, AXIS_INFO);
Workspace is a class that holds all the related objects created during runtime: (1) all blobs...
constexpr size_t size() const
size - Get the array size.
A global dictionary that holds information about what Caffe2 modules have been loaded in the current ...
A templated class to allow one to wrap a CPU operator as an IDEEP operator.
bool HasArgument(const string &name) const
Checks if the operator has an argument of the given name.