1 #include "caffe2/operators/utility_ops.h" 2 #include "caffe2/core/operator.h" 3 #include "caffe2/ideep/ideep_utils.h" 10 USE_IDEEP_DEF_ALIASES();
12 bool RunOnDevice()
override {
13 const auto& X = OperatorBase::Input<Tensor>(0, CPU);
14 auto* Y = OperatorBase::OutputBlob(0);
15 itensor::dims src_dims(X.sizes().begin(), X.sizes().end());
16 if (!(Y->template IsType<itensor>() &&
17 Y->Get<itensor>().get_data_type() == itensor::data_type::f32) ||
18 Y->Get<itensor>().get_dims() != src_dims) {
19 Y->Reset(
new itensor());
20 Y->GetMutable<itensor>()->resize(src_dims, itensor::data_type::f32);
22 Y->GetMutable<itensor>()->reorder_from(
23 src_dims, itensor::data_type::f32, X.raw_data());
31 USE_IDEEP_DEF_ALIASES();
33 bool RunOnDevice()
override {
34 const auto& X = OperatorBase::Input<itensor>(0);
38 ideep::direct_copy::compute(X, *Y);
48 USE_IDEEP_DEF_ALIASES();
49 bool RunOnDevice()
override {
50 const auto& input_blob = OperatorBase::InputBlob(0);
51 if (BlobIsTensorType(input_blob, CPU)) {
52 VLOG(2) <<
"Directing sharing of TensorCPU";
53 const auto& X = OperatorBase::Input<Tensor>(0, CPU);
54 OutputTensorCopyFrom(0, at::device(CPU), X);
56 const auto& X = OperatorBase::Input<itensor>(0);
57 if (X.get_data_type() == itensor::data_type::f32) {
58 std::vector<int64_t> dims;
59 for (
int i = 0; i < X.get_dims().size(); ++i) {
60 dims.push_back(X.get_dims()[i]);
63 OperatorBase::OutputTensor(0, dims, at::dtype<float>().device(CPU));
64 X.reorder_to(Y->template mutable_data<float>());
66 CAFFE_THROW(
"Unsupported ideep type: ", X.get_data_type());
75 USE_IDEEP_DEF_ALIASES();
76 USE_IDEEP_OPERATOR_FUNCTIONS();
80 bool RunOnDevice()
override {
81 CAFFE_ENFORCE_EQ(InputSize() % 2, 0);
82 auto ndims = Input(0).ndims();
83 auto nelems = Input(0).get_nelems();
84 auto w_nelems = Input(1).get_nelems();
85 CAFFE_ENFORCE_GT(nelems, 0);
86 CAFFE_ENFORCE_EQ(w_nelems, 1);
87 auto* output = Output(0);
88 std::vector<float> scales;
89 scales.reserve(InputSize() / 2);
90 std::vector<itensor> inputs;
91 inputs.reserve(InputSize() / 2);
92 for (
int i = 0; i < InputSize(); i += 2) {
94 CAFFE_ENFORCE(X.ndims() == ndims);
95 CAFFE_ENFORCE(X.get_nelems() == nelems);
96 CAFFE_ENFORCE(Input(i + 1).get_nelems() == w_nelems);
98 auto scale =
static_cast<float *
>(Input(i + 1).get_data_handle());
99 scales.push_back(scale[0]);
102 ideep::sum::compute(scales, inputs, *output);
113 OPERATOR_SCHEMA(CopyCPUToIDEEP)
116 .Input(0,
"cpu_blob",
"The input TensorCPU to copy")
117 .Output(0,
"ideep_blob",
"The output IDEEP tensort to copy to");
118 OPERATOR_SCHEMA(CopyIDEEPToCPU)
121 .Input(0,
"ideep_blob",
"The input IDEEP tensort to copy")
122 .Output(0,
"cpu_blob",
"The output TensorCPU to copy to");
Workspace is a class that holds all the related objects created during runtime: (1) all blobs...
A global dictionary that holds information about what Caffe2 modules have been loaded in the current ...