Caffe2 - C++ API
A deep learning, cross platform ML framework
onnxifi_op.cc
1 #include "caffe2/operators/onnxifi_op.h"
2 
3 namespace caffe2 {
4 
5 namespace {
6 
7 void SetInputTensorDescriptorTypeAndBuffer(
8  const Tensor& cpu_tensor,
9  onnxTensorDescriptorV1* desc) {
10  if (cpu_tensor.template IsType<float>()) {
11  desc->dataType = ONNXIFI_DATATYPE_FLOAT32;
12  desc->buffer = reinterpret_cast<onnxPointer>(cpu_tensor.data<float>());
13  } else if (cpu_tensor.template IsType<int32_t>()) {
14  desc->dataType = ONNXIFI_DATATYPE_INT32;
15  desc->buffer = reinterpret_cast<onnxPointer>(cpu_tensor.data<int32_t>());
16  } else if (cpu_tensor.template IsType<int8_t>()) {
17  desc->dataType = ONNXIFI_DATATYPE_INT8;
18  desc->buffer = reinterpret_cast<onnxPointer>(cpu_tensor.data<int8_t>());
19  } else if (cpu_tensor.template IsType<uint8_t>()) {
20  desc->dataType = ONNXIFI_DATATYPE_UINT8;
21  desc->buffer = reinterpret_cast<onnxPointer>(cpu_tensor.data<uint8_t>());
22  } else if (cpu_tensor.template IsType<int64_t>()) {
23  desc->dataType = ONNXIFI_DATATYPE_INT64;
24  desc->buffer = reinterpret_cast<onnxPointer>(cpu_tensor.data<int64_t>());
25  } else if (cpu_tensor.template IsType<int16_t>()) {
26  desc->dataType = ONNXIFI_DATATYPE_INT16;
27  desc->buffer = reinterpret_cast<onnxPointer>(cpu_tensor.data<int16_t>());
28  } else if (cpu_tensor.template IsType<uint16_t>()) {
29  desc->dataType = ONNXIFI_DATATYPE_UINT16;
30  desc->buffer = reinterpret_cast<onnxPointer>(cpu_tensor.data<uint16_t>());
31  } else {
32  CAFFE_THROW(
33  "Unsupported tensor type in ONNXIFI: ", cpu_tensor.dtype().name());
34  }
35 }
36 
37 TypeMeta OnnixfiTypeToDataType(uint64_t onnxifi_type) {
38  static std::map<uint64_t, TypeMeta> data_type_map {
39  {ONNXIFI_DATATYPE_FLOAT32, TypeMeta::Make<float>()},
40  {ONNXIFI_DATATYPE_INT32, TypeMeta::Make<int>()},
41  {ONNXIFI_DATATYPE_INT8, TypeMeta::Make<int8_t>()},
42  {ONNXIFI_DATATYPE_UINT8, TypeMeta::Make<uint8_t>()},
43  {ONNXIFI_DATATYPE_INT64, TypeMeta::Make<int64_t>()},
44  {ONNXIFI_DATATYPE_INT16, TypeMeta::Make<int16_t>()},
45  {ONNXIFI_DATATYPE_UINT16, TypeMeta::Make<uint16_t>()},
46  };
47  const auto it = data_type_map.find(onnxifi_type);
48  CAFFE_ENFORCE(it != data_type_map.end(), "Unsupported ONXNIFI data type: ", onnxifi_type);
49  return it->second;
50 }
51 
52 void SetOutputTensorDescriptorTypeAndBuffer(
53  uint64_t onnxifi_type,
54  Tensor* cpu_tensor,
55  onnxTensorDescriptorV1* desc) {
56  desc->dataType = onnxifi_type;
57  desc->buffer = reinterpret_cast<onnxPointer>(cpu_tensor->raw_mutable_data(OnnixfiTypeToDataType(onnxifi_type)));
58 }
59 
60 void BlobToTensorDescriptor(
61  const std::string& name,
62  Workspace* ws,
63  onnxTensorDescriptorV1* desc,
64  std::vector<std::vector<uint64_t>>* shapes) {
65  const Blob* blob = ws->GetBlob(name);
66  CAFFE_ENFORCE(blob, "Blob ", name, " doesn't exist");
67 
68  // Memory type
69  // We only allow weights to be CPU tensor for now
70  CAFFE_ENFORCE(
71  BlobIsTensorType(*blob, CPU),
72  "Initialization blob ",
73  name,
74  " needs to be TensorCPU");
75  desc->tag = ONNXIFI_TAG_TENSOR_DESCRIPTOR_V1;
76  desc->memoryType = ONNXIFI_MEMORY_TYPE_CPU;
77 
78  // Data type
79  const auto& cpu_tensor = blob->template Get<TensorCPU>();
80  SetInputTensorDescriptorTypeAndBuffer(cpu_tensor, desc);
81 
82  // Set dims
83  const auto shape = cpu_tensor.sizes();
84  desc->dimensions = shape.size();
85  shapes->emplace_back(shape.cbegin(), shape.cend());
86  desc->shape = shapes->back().data();
87 }
88 } // namespace
89 
90 template <>
91 std::vector<onnxTensorDescriptorV1>
92 OnnxifiOp<float, CPUContext>::buildInitializationList(
93  Workspace* ws,
94  std::unordered_set<std::string>* initialization_list,
95  std::vector<std::string>* weight_names,
96  std::vector<std::vector<uint64_t>>* weight_shapes) {
97  const std::vector<string>& ws_blobs = ws->Blobs();
98  // Since onnxTensorDescriptorV1.name will point into the memory in
99  // weight_names, we need to prevent weight_names from reallocating by
100  // reserving enough memory ahead of time
101  weight_names->reserve(ws_blobs.size());
102  std::vector<onnxTensorDescriptorV1> descs;
103  for (const auto& s : ws_blobs) {
104  auto it = initialization_list->find(s);
105  if (it != initialization_list->end()) {
106  weight_names->emplace_back(s);
107  onnxTensorDescriptorV1 tensor_desc;
108  tensor_desc.name = weight_names->back().c_str();
109  BlobToTensorDescriptor(s, ws, &tensor_desc, weight_shapes);
110  descs.push_back(tensor_desc);
111  initialization_list->erase(it);
112  }
113  }
114  CAFFE_ENFORCE(
115  initialization_list->empty(), "Unfulfilled initialization list");
116  return descs;
117 }
118 
119 template <>
120 bool OnnxifiOp<float, CPUContext>::RunOnDevice() {
121  CAFFE_ENFORCE_EQ(input_desc_.size(), InputSize());
122  for (unsigned i = 0U; i < InputSize(); ++i) {
123  const auto& input_tensor = Input(i);
124  const auto tensor_dims = input_tensor.sizes();
125  auto& tensor_descriptor = input_desc_[i];
126  tensor_descriptor.tag = ONNXIFI_TAG_TENSOR_DESCRIPTOR_V1;
127  tensor_descriptor.memoryType = ONNXIFI_MEMORY_TYPE_CPU;
128  tensor_descriptor.dimensions = tensor_dims.size();
129  input_shapes_.emplace_back(tensor_dims.cbegin(), tensor_dims.cend());
130  tensor_descriptor.shape = input_shapes_.back().data();
131  SetInputTensorDescriptorTypeAndBuffer(input_tensor, &tensor_descriptor);
132  }
133 
134  CAFFE_ENFORCE_EQ(output_desc_.size(), OutputSize());
135  for (unsigned i = 0U; i < OutputSize(); ++i) {
136  std::vector<size_t> tensor_dims;
137  uint64_t type = SetOutputShapeAndType(i, &tensor_dims);
138  auto& tensor_descriptor = output_desc_[i];
139  tensor_descriptor.tag = ONNXIFI_TAG_TENSOR_DESCRIPTOR_V1;
140  tensor_descriptor.memoryType = ONNXIFI_MEMORY_TYPE_CPU;
141  tensor_descriptor.dimensions = tensor_dims.size();
142  CAFFE_ENFORCE(
143  tensor_descriptor.dimensions != 0,
144  tensor_descriptor.name,
145  " has 0 dim");
146  output_shapes_.emplace_back(tensor_dims.cbegin(), tensor_dims.cend());
147  tensor_descriptor.shape = output_shapes_.back().data();
148  std::vector<int64_t> tensor_dims_int64;
149  std::copy(tensor_dims.cbegin(), tensor_dims.cend(), std::back_inserter(tensor_dims_int64));
150  auto* output_tensor = Output(i, tensor_dims_int64, at::dtype(OnnixfiTypeToDataType(type)).device(CPU));
151  SetOutputTensorDescriptorTypeAndBuffer(
152  type, output_tensor, &tensor_descriptor);
153  }
154  bool ext_supported = false;
155  onnxMemoryFenceV1 input_fence;
156  onnxMemoryFenceV1 output_fence;
157 #ifdef ONNXIFI_ENABLE_EXT
158 
164  if (onnxSetIOAndRunGraphPointer_ != nullptr) {
165  ext_supported = true;
166  output_fence.tag = ONNXIFI_TAG_MEMORY_FENCE_V1;
167  output_fence.type = ONNXIFI_SYNCHRONIZATION_EVENT;
168  CAFFE_ENFORCE_EQ(
169  (*onnxSetIOAndRunGraphPointer_)(
170  graph_,
171  input_desc_.size(),
172  input_desc_.data(),
173  output_desc_.size(),
174  output_desc_.data(),
175  &output_fence,
176  /* traceEvents */ nullptr),
177  ONNXIFI_STATUS_SUCCESS);
178  CAFFE_ENFORCE_EQ(
179  lib_->onnxWaitEvent(output_fence.event), ONNXIFI_STATUS_SUCCESS);
180  CAFFE_ENFORCE_EQ(
181  lib_->onnxReleaseEvent(output_fence.event), ONNXIFI_STATUS_SUCCESS);
182  }
183 #endif
184  if (!ext_supported) {
185  CAFFE_ENFORCE_EQ(
186  lib_->onnxSetGraphIO(
187  graph_,
188  input_desc_.size(),
189  input_desc_.data(),
190  output_desc_.size(),
191  output_desc_.data()),
192  ONNXIFI_STATUS_SUCCESS);
193 
194  input_fence.tag = ONNXIFI_TAG_MEMORY_FENCE_V1;
195  input_fence.type = ONNXIFI_SYNCHRONIZATION_EVENT;
196  CAFFE_ENFORCE_EQ(
197  lib_->onnxInitEvent(backend_, &input_fence.event),
198  ONNXIFI_STATUS_SUCCESS);
199  output_fence.tag = ONNXIFI_TAG_MEMORY_FENCE_V1;
200  output_fence.type = ONNXIFI_SYNCHRONIZATION_EVENT;
201 
202  // Call the async run on backend, signal event on input fence and wait for
203  // the event on output fence
204  CAFFE_ENFORCE_EQ(
205  lib_->onnxRunGraph(graph_, &input_fence, &output_fence),
206  ONNXIFI_STATUS_SUCCESS);
207  CAFFE_ENFORCE_EQ(
208  lib_->onnxSignalEvent(input_fence.event), ONNXIFI_STATUS_SUCCESS);
209  CAFFE_ENFORCE_EQ(
210  lib_->onnxWaitEvent(output_fence.event), ONNXIFI_STATUS_SUCCESS);
211 
212  // Destroy the event objects
213  CAFFE_ENFORCE_EQ(
214  lib_->onnxReleaseEvent(input_fence.event), ONNXIFI_STATUS_SUCCESS);
215  CAFFE_ENFORCE_EQ(
216  lib_->onnxReleaseEvent(output_fence.event), ONNXIFI_STATUS_SUCCESS);
217  }
218  return true;
219 }
220 
221 REGISTER_CPU_OPERATOR(Onnxifi, OnnxifiOp<float, CPUContext>);
222 OPERATOR_SCHEMA(Onnxifi)
223  .NumInputs(0, INT_MAX)
224  .NumOutputs(0, INT_MAX)
225  .SetDoc(R"DOC(
226  The Onnxifi operator is a black-box operator to lower the computation to Onnxifi backend
227  )DOC")
228  .Arg(
229  "onnx_model",
230  "(string default=\"\") Serialized ONNX model to be converted to backend representation")
231  .Arg(
232  "initializers",
233  "Initialization pair indicating the mapping of the name between NetDef and ONNX model");
234 } // namespace caffe2
A global dictionary that holds information about what Caffe2 modules have been loaded in the current ...
Definition: blob.h:13