Caffe2 - C++ API
A deep learning, cross platform ML framework
operator_fallback_ideep.h
1 #pragma once
2 
3 #include <caffe2/core/common.h>
4 #include <caffe2/core/context.h>
5 #include <caffe2/core/operator.h>
6 #include <caffe2/ideep/ideep_utils.h>
7 #include <caffe2/proto/caffe2_pb.h>
8 
9 namespace caffe2 {
10 
38 template <class CPUOp, typename SkipOutputCopy = SkipIndices<>>
39 class C10_EXPORT IDEEPFallbackOp final : public IDEEPOperator {
40  public:
41  USE_IDEEP_DEF_ALIASES();
42  USE_IDEEP_OPERATOR_FUNCTIONS();
43 
44  IDEEPFallbackOp(const OperatorDef& def, Workspace* ws)
45  : IDEEPOperator(def, ws) {
46  CAFFE_ENFORCE_EQ(def.device_option().device_type(), PROTO_IDEEP);
47  base_def_.CopyFrom(def);
48  // base_def_ runs on CPU, so we will set its device option to CPU.
49  // Copy to allow random_seed to be correctly propagated.
50  base_def_.mutable_device_option()->CopyFrom(def.device_option());
51  base_def_.mutable_device_option()->set_device_type(PROTO_CPU);
52  // Create output blobs in parent workspace,
53  // then forward output blobs to local workspace.
54  std::unordered_map<string, string> forwarded_output_blobs;
55  for (int i = 0; i < base_def_.output_size(); i++) {
56  // For in-place case, the in/output tensor for local_ws must be
57  // re-created, instead of forwarding from current workspace.
58  string parent_name(base_def_.output(i));
59  if (!SkipOutputCopy::Contains(i)) {
60  parent_name += "_cpu_output_blob_" + base_def_.type();
61  }
62  local_output_blobs_.push_back(ws->CreateBlob(parent_name));
63  CHECK_NOTNULL(local_output_blobs_.back());
64  forwarded_output_blobs[base_def_.output(i)] = parent_name;
65  output_inplace_.push_back(false);
66  for (const string &input_name : base_def_.input()) {
67  if (input_name == base_def_.output(i)) {
68  output_inplace_[i] = true;
69  break;
70  }
71  }
72  }
73  local_ws_.reset(new Workspace(ws, forwarded_output_blobs));
74  // Set up the symbols for the local workspace.
75  for (const string& name : base_def_.input()) {
76  local_input_blobs_.push_back(local_ws_->CreateBlob(name));
77  CHECK_NOTNULL(local_input_blobs_.back());
78  }
79  input_share_.resize(local_input_blobs_.size(), false);
80  base_op_.reset(new CPUOp(base_def_, local_ws_.get()));
81  }
82 
83  bool RunOnDevice() override {
84  for (int i = 0; i < InputSize(); ++i) {
85  if (InputIsType<itensor>(i) &&
86  Input(i).get_data_type() == itensor::data_type::f32) {
87  auto& input = Input(i);
88  if (input_share_[i]) {
89  local_input_blobs_[i]->Reset();
90  }
91  input_share_[i] = false;
92  auto dtensor = BlobGetMutableTensor(local_input_blobs_[i], CPU);
93  dtensor->Resize(input.get_dims());
94  if (input.is_public_format()) {
95  dtensor->ShareExternalPointer(
96  static_cast<float*>(input.get_data_handle()));
97  } else {
98  input.reorder_to(dtensor->template mutable_data<float>());
99  }
100  } else {
101  VLOG(1) << "Input " << i << " is not ideep::tensor. Skipping copy.";
102  if (OperatorBase::Inputs()[i]->GetRaw() != local_input_blobs_[i]->GetRaw()) {
103  // Note(jiayq): This removes a const but conceptually
104  // local_input_blobs will only be used as const blob input for the
105  // base op so we are still fine.
106  local_input_blobs_[i]->ShareExternal(
107  const_cast<void *>(OperatorBase::Inputs()[i]->GetRaw()),
108  OperatorBase::Inputs()[i]->meta());
109  }
110  input_share_[i] = true;
111  }
112  }
113 
114  // Some CPU ops inherited from OperatorBase directly might need this default
115  // input argument '0' like 'PrefetchOperator'.
116  if (!base_op_->Run(0)) {
117  LOG(ERROR) << "Base op run failed in IDEEPFallbackOp. Def: "
118  << ProtoDebugString(this->debug_def());
119  return false;
120  }
121 
122  for (int i = 0; i < OutputSize(); ++i) {
123  if (SkipOutputCopy::Contains(i)) {
124  VLOG(1) << "Copy output: index " << i << " skipped.";
125  continue;
126  }
127  CAFFE_ENFORCE(
128  BlobIsTensorType(*local_output_blobs_[i], CPU),
129  "IDEEP fallback op currently does not support non-TensorCPU "
130  "output type who needs copying.");
131  const auto& src = local_output_blobs_[i]->template Get<TensorCPU>();
132  auto src_dims = src.sizes().vec();
133  if (src.template IsType<float>() && src.dim() != 0 && base_op_->type() != "Python") {
134  Blob* dst = OperatorBase::OutputBlob(i);
135  // The output tensor must be ideep tensor with public format.
136  // If reusing ideep tensor with non-public format, the tensor buffer
137  // will be interpreted incorrectly.
138  if (!dst->template IsType<itensor>() ||
139  !dst->template Get<itensor>().is_public_format()) {
140  dst->Reset(new itensor());
141  }
142 
143  itensor::dims dst_dims (src_dims.begin(), src_dims.end());
144  auto dtensor = dst->template GetMutable<itensor>();
145  if (dtensor->get_dims() != dst_dims) {
146  dtensor->resize(dst_dims, itensor::data_type::f32);
147  }
148  if (output_inplace_[i]) {
149  dtensor->reorder_from(dst_dims, itensor::data_type::f32,
150  const_cast<void*>(src.raw_data()));
151  } else {
152  dtensor->set_data_handle(const_cast<void *>(src.raw_data()));
153  }
154  } else {
155  VLOG(2) << "Output " << base_def_.output(i) << " as CPUTensor";
156  Blob* dst = OperatorBase::OutputBlob(i);
157  if (output_inplace_[i]) {
158  auto dtensor = BlobGetMutableTensor(dst, CPU);
159  dtensor->CopyFrom(src);
160  } else {
161  dst->Reset(new Tensor(CPU));
162  BlobSetTensor(dst, src.Alias());
163  }
164  }
165  }
166  return true;
167  }
168 
169  protected:
170  vector<Blob*> local_input_blobs_;
171  vector<Blob*> local_output_blobs_;
172  vector<bool> output_inplace_;
173  vector<bool> input_share_;
174  std::unique_ptr<CPUOp> base_op_;
175  std::unique_ptr<Workspace> local_ws_;
176  OperatorDef base_def_;
177 };
178 
179 } // namespace caffe2
Blob is a general container that hosts a typed pointer.
Definition: blob.h:24
Blob * CreateBlob(const string &name)
Creates a blob of the given name.
Definition: workspace.cc:100
Workspace is a class that holds all the related objects created during runtime: (1) all blobs...
Definition: workspace.h:47
A global dictionary that holds information about what Caffe2 modules have been loaded in the current ...
Definition: blob.h:13
T * Reset(T *allocated)
Sets the underlying object to the allocated one.
Definition: blob.h:132
A templated class to allow one to wrap a CPU operator as an IDEEP operator.