1 #include "caffe2/operators/onnxifi_op.h" 7 void SetInputTensorDescriptorTypeAndBuffer(
9 onnxTensorDescriptorV1* desc) {
10 if (cpu_tensor.template IsType<float>()) {
11 desc->dataType = ONNXIFI_DATATYPE_FLOAT32;
12 desc->buffer =
reinterpret_cast<onnxPointer
>(cpu_tensor.data<
float>());
13 }
else if (cpu_tensor.template IsType<int32_t>()) {
14 desc->dataType = ONNXIFI_DATATYPE_INT32;
15 desc->buffer =
reinterpret_cast<onnxPointer
>(cpu_tensor.data<int32_t>());
16 }
else if (cpu_tensor.template IsType<int8_t>()) {
17 desc->dataType = ONNXIFI_DATATYPE_INT8;
18 desc->buffer =
reinterpret_cast<onnxPointer
>(cpu_tensor.data<int8_t>());
19 }
else if (cpu_tensor.template IsType<uint8_t>()) {
20 desc->dataType = ONNXIFI_DATATYPE_UINT8;
21 desc->buffer =
reinterpret_cast<onnxPointer
>(cpu_tensor.data<uint8_t>());
22 }
else if (cpu_tensor.template IsType<int64_t>()) {
23 desc->dataType = ONNXIFI_DATATYPE_INT64;
24 desc->buffer =
reinterpret_cast<onnxPointer
>(cpu_tensor.data<int64_t>());
25 }
else if (cpu_tensor.template IsType<int16_t>()) {
26 desc->dataType = ONNXIFI_DATATYPE_INT16;
27 desc->buffer =
reinterpret_cast<onnxPointer
>(cpu_tensor.data<int16_t>());
28 }
else if (cpu_tensor.template IsType<uint16_t>()) {
29 desc->dataType = ONNXIFI_DATATYPE_UINT16;
30 desc->buffer =
reinterpret_cast<onnxPointer
>(cpu_tensor.data<uint16_t>());
33 "Unsupported tensor type in ONNXIFI: ", cpu_tensor.dtype().name());
37 TypeMeta OnnixfiTypeToDataType(uint64_t onnxifi_type) {
38 static std::map<uint64_t, TypeMeta> data_type_map {
39 {ONNXIFI_DATATYPE_FLOAT32, TypeMeta::Make<float>()},
40 {ONNXIFI_DATATYPE_INT32, TypeMeta::Make<int>()},
41 {ONNXIFI_DATATYPE_INT8, TypeMeta::Make<int8_t>()},
42 {ONNXIFI_DATATYPE_UINT8, TypeMeta::Make<uint8_t>()},
43 {ONNXIFI_DATATYPE_INT64, TypeMeta::Make<int64_t>()},
44 {ONNXIFI_DATATYPE_INT16, TypeMeta::Make<int16_t>()},
45 {ONNXIFI_DATATYPE_UINT16, TypeMeta::Make<uint16_t>()},
47 const auto it = data_type_map.find(onnxifi_type);
48 CAFFE_ENFORCE(it != data_type_map.end(),
"Unsupported ONXNIFI data type: ", onnxifi_type);
52 void SetOutputTensorDescriptorTypeAndBuffer(
53 uint64_t onnxifi_type,
55 onnxTensorDescriptorV1* desc) {
56 desc->dataType = onnxifi_type;
57 desc->buffer =
reinterpret_cast<onnxPointer
>(cpu_tensor->raw_mutable_data(OnnixfiTypeToDataType(onnxifi_type)));
60 void BlobToTensorDescriptor(
61 const std::string& name,
63 onnxTensorDescriptorV1* desc,
64 std::vector<std::vector<uint64_t>>* shapes) {
65 const Blob* blob = ws->GetBlob(name);
66 CAFFE_ENFORCE(blob,
"Blob ", name,
" doesn't exist");
71 BlobIsTensorType(*blob, CPU),
72 "Initialization blob ",
74 " needs to be TensorCPU");
75 desc->tag = ONNXIFI_TAG_TENSOR_DESCRIPTOR_V1;
76 desc->memoryType = ONNXIFI_MEMORY_TYPE_CPU;
79 const auto& cpu_tensor = blob->template Get<TensorCPU>();
80 SetInputTensorDescriptorTypeAndBuffer(cpu_tensor, desc);
83 const auto shape = cpu_tensor.sizes();
84 desc->dimensions = shape.size();
85 shapes->emplace_back(shape.cbegin(), shape.cend());
86 desc->shape = shapes->back().data();
91 std::vector<onnxTensorDescriptorV1>
92 OnnxifiOp<float, CPUContext>::buildInitializationList(
94 std::unordered_set<std::string>* initialization_list,
95 std::vector<std::string>* weight_names,
96 std::vector<std::vector<uint64_t>>* weight_shapes) {
97 const std::vector<string>& ws_blobs = ws->Blobs();
101 weight_names->reserve(ws_blobs.size());
102 std::vector<onnxTensorDescriptorV1> descs;
103 for (
const auto& s : ws_blobs) {
104 auto it = initialization_list->find(s);
105 if (it != initialization_list->end()) {
106 weight_names->emplace_back(s);
107 onnxTensorDescriptorV1 tensor_desc;
108 tensor_desc.name = weight_names->back().c_str();
109 BlobToTensorDescriptor(s, ws, &tensor_desc, weight_shapes);
110 descs.push_back(tensor_desc);
111 initialization_list->erase(it);
115 initialization_list->empty(),
"Unfulfilled initialization list");
120 bool OnnxifiOp<float, CPUContext>::RunOnDevice() {
121 CAFFE_ENFORCE_EQ(input_desc_.size(), InputSize());
122 for (
unsigned i = 0U; i < InputSize(); ++i) {
123 const auto& input_tensor = Input(i);
124 const auto tensor_dims = input_tensor.sizes();
125 auto& tensor_descriptor = input_desc_[i];
126 tensor_descriptor.tag = ONNXIFI_TAG_TENSOR_DESCRIPTOR_V1;
127 tensor_descriptor.memoryType = ONNXIFI_MEMORY_TYPE_CPU;
128 tensor_descriptor.dimensions = tensor_dims.size();
129 input_shapes_.emplace_back(tensor_dims.cbegin(), tensor_dims.cend());
130 tensor_descriptor.shape = input_shapes_.back().data();
131 SetInputTensorDescriptorTypeAndBuffer(input_tensor, &tensor_descriptor);
134 CAFFE_ENFORCE_EQ(output_desc_.size(), OutputSize());
135 for (
unsigned i = 0U; i < OutputSize(); ++i) {
136 std::vector<size_t> tensor_dims;
137 uint64_t type = SetOutputShapeAndType(i, &tensor_dims);
138 auto& tensor_descriptor = output_desc_[i];
139 tensor_descriptor.tag = ONNXIFI_TAG_TENSOR_DESCRIPTOR_V1;
140 tensor_descriptor.memoryType = ONNXIFI_MEMORY_TYPE_CPU;
141 tensor_descriptor.dimensions = tensor_dims.size();
143 tensor_descriptor.dimensions != 0,
144 tensor_descriptor.name,
146 output_shapes_.emplace_back(tensor_dims.cbegin(), tensor_dims.cend());
147 tensor_descriptor.shape = output_shapes_.back().data();
148 std::vector<int64_t> tensor_dims_int64;
149 std::copy(tensor_dims.cbegin(), tensor_dims.cend(), std::back_inserter(tensor_dims_int64));
150 auto* output_tensor = Output(i, tensor_dims_int64, at::dtype(OnnixfiTypeToDataType(type)).device(CPU));
151 SetOutputTensorDescriptorTypeAndBuffer(
152 type, output_tensor, &tensor_descriptor);
154 bool ext_supported =
false;
155 onnxMemoryFenceV1 input_fence;
156 onnxMemoryFenceV1 output_fence;
157 #ifdef ONNXIFI_ENABLE_EXT 164 if (onnxSetIOAndRunGraphPointer_ !=
nullptr) {
165 ext_supported =
true;
166 output_fence.tag = ONNXIFI_TAG_MEMORY_FENCE_V1;
167 output_fence.type = ONNXIFI_SYNCHRONIZATION_EVENT;
169 (*onnxSetIOAndRunGraphPointer_)(
177 ONNXIFI_STATUS_SUCCESS);
179 lib_->onnxWaitEvent(output_fence.event), ONNXIFI_STATUS_SUCCESS);
181 lib_->onnxReleaseEvent(output_fence.event), ONNXIFI_STATUS_SUCCESS);
184 if (!ext_supported) {
186 lib_->onnxSetGraphIO(
191 output_desc_.data()),
192 ONNXIFI_STATUS_SUCCESS);
194 input_fence.tag = ONNXIFI_TAG_MEMORY_FENCE_V1;
195 input_fence.type = ONNXIFI_SYNCHRONIZATION_EVENT;
197 lib_->onnxInitEvent(backend_, &input_fence.event),
198 ONNXIFI_STATUS_SUCCESS);
199 output_fence.tag = ONNXIFI_TAG_MEMORY_FENCE_V1;
200 output_fence.type = ONNXIFI_SYNCHRONIZATION_EVENT;
205 lib_->onnxRunGraph(graph_, &input_fence, &output_fence),
206 ONNXIFI_STATUS_SUCCESS);
208 lib_->onnxSignalEvent(input_fence.event), ONNXIFI_STATUS_SUCCESS);
210 lib_->onnxWaitEvent(output_fence.event), ONNXIFI_STATUS_SUCCESS);
214 lib_->onnxReleaseEvent(input_fence.event), ONNXIFI_STATUS_SUCCESS);
216 lib_->onnxReleaseEvent(output_fence.event), ONNXIFI_STATUS_SUCCESS);
221 REGISTER_CPU_OPERATOR(Onnxifi, OnnxifiOp<float, CPUContext>);
222 OPERATOR_SCHEMA(Onnxifi)
223 .NumInputs(0, INT_MAX)
224 .NumOutputs(0, INT_MAX)
226 The Onnxifi operator is a black-box operator to lower the computation to Onnxifi backend 230 "(string default=\"\") Serialized ONNX model to be converted to backend representation")
233 "Initialization pair indicating the mapping of the name between NetDef and ONNX model");
A global dictionary that holds information about what Caffe2 modules have been loaded in the current ...