3 #include <caffe2/core/common.h> 4 #include <caffe2/core/context.h> 5 #include <caffe2/core/operator.h> 6 #include <caffe2/ideep/ideep_utils.h> 7 #include <caffe2/proto/caffe2_pb.h> 38 template <
class CPUOp,
typename SkipOutputCopy = SkipIndices<>>
41 USE_IDEEP_DEF_ALIASES();
42 USE_IDEEP_OPERATOR_FUNCTIONS();
46 CAFFE_ENFORCE_EQ(def.device_option().device_type(), PROTO_IDEEP);
47 base_def_.CopyFrom(def);
50 base_def_.mutable_device_option()->CopyFrom(def.device_option());
51 base_def_.mutable_device_option()->set_device_type(PROTO_CPU);
54 std::unordered_map<string, string> forwarded_output_blobs;
55 for (
int i = 0; i < base_def_.output_size(); i++) {
58 string parent_name(base_def_.output(i));
59 if (!SkipOutputCopy::Contains(i)) {
60 parent_name +=
"_cpu_output_blob_" + base_def_.type();
62 local_output_blobs_.push_back(ws->
CreateBlob(parent_name));
63 CHECK_NOTNULL(local_output_blobs_.back());
64 forwarded_output_blobs[base_def_.output(i)] = parent_name;
65 output_inplace_.push_back(
false);
66 for (
const string &input_name : base_def_.input()) {
67 if (input_name == base_def_.output(i)) {
68 output_inplace_[i] =
true;
73 local_ws_.reset(
new Workspace(ws, forwarded_output_blobs));
75 for (
const string& name : base_def_.input()) {
76 local_input_blobs_.push_back(local_ws_->CreateBlob(name));
77 CHECK_NOTNULL(local_input_blobs_.back());
79 input_share_.resize(local_input_blobs_.size(),
false);
80 base_op_.reset(
new CPUOp(base_def_, local_ws_.get()));
83 bool RunOnDevice()
override {
84 for (
int i = 0; i < InputSize(); ++i) {
85 if (InputIsType<itensor>(i) &&
86 Input(i).get_data_type() == itensor::data_type::f32) {
87 auto& input = Input(i);
88 if (input_share_[i]) {
89 local_input_blobs_[i]->Reset();
91 input_share_[i] =
false;
92 auto dtensor = BlobGetMutableTensor(local_input_blobs_[i], CPU);
93 dtensor->Resize(input.get_dims());
94 if (input.is_public_format()) {
95 dtensor->ShareExternalPointer(
96 static_cast<float*>(input.get_data_handle()));
98 input.reorder_to(dtensor->template mutable_data<float>());
101 VLOG(1) <<
"Input " << i <<
" is not ideep::tensor. Skipping copy.";
102 if (OperatorBase::Inputs()[i]->GetRaw() != local_input_blobs_[i]->GetRaw()) {
106 local_input_blobs_[i]->ShareExternal(
107 const_cast<void *>(OperatorBase::Inputs()[i]->GetRaw()),
108 OperatorBase::Inputs()[i]->meta());
110 input_share_[i] =
true;
116 if (!base_op_->Run(0)) {
117 LOG(ERROR) <<
"Base op run failed in IDEEPFallbackOp. Def: " 118 << ProtoDebugString(this->debug_def());
122 for (
int i = 0; i < OutputSize(); ++i) {
123 if (SkipOutputCopy::Contains(i)) {
124 VLOG(1) <<
"Copy output: index " << i <<
" skipped.";
128 BlobIsTensorType(*local_output_blobs_[i], CPU),
129 "IDEEP fallback op currently does not support non-TensorCPU " 130 "output type who needs copying.");
131 const auto& src = local_output_blobs_[i]->template Get<TensorCPU>();
132 auto src_dims = src.sizes().vec();
133 if (src.template IsType<float>() && src.dim() != 0 && base_op_->type() !=
"Python") {
134 Blob* dst = OperatorBase::OutputBlob(i);
138 if (!dst->template IsType<itensor>() ||
139 !dst->template Get<itensor>().is_public_format()) {
140 dst->
Reset(
new itensor());
143 itensor::dims dst_dims (src_dims.begin(), src_dims.end());
144 auto dtensor = dst->template GetMutable<itensor>();
145 if (dtensor->get_dims() != dst_dims) {
146 dtensor->resize(dst_dims, itensor::data_type::f32);
148 if (output_inplace_[i]) {
149 dtensor->reorder_from(dst_dims, itensor::data_type::f32,
150 const_cast<void*>(src.raw_data()));
152 dtensor->set_data_handle(const_cast<void *>(src.raw_data()));
155 VLOG(2) <<
"Output " << base_def_.output(i) <<
" as CPUTensor";
156 Blob* dst = OperatorBase::OutputBlob(i);
157 if (output_inplace_[i]) {
158 auto dtensor = BlobGetMutableTensor(dst, CPU);
159 dtensor->CopyFrom(src);
162 BlobSetTensor(dst, src.Alias());
170 vector<Blob*> local_input_blobs_;
171 vector<Blob*> local_output_blobs_;
172 vector<bool> output_inplace_;
173 vector<bool> input_share_;
174 std::unique_ptr<CPUOp> base_op_;
175 std::unique_ptr<Workspace> local_ws_;
176 OperatorDef base_def_;
Blob is a general container that hosts a typed pointer.
Blob * CreateBlob(const string &name)
Creates a blob of the given name.
Workspace is a class that holds all the related objects created during runtime: (1) all blobs...
A global dictionary that holds information about what Caffe2 modules have been loaded in the current ...
T * Reset(T *allocated)
Sets the underlying object to the allocated one.
A templated class to allow one to wrap a CPU operator as an IDEEP operator.