1 #include "caffe2/opt/onnxifi_transformer.h" 4 #include <unordered_set> 6 #include "onnx/proto_utils.h" 8 #include "caffe2/core/context.h" 9 #include "caffe2/core/logging.h" 10 #include "caffe2/core/operator.h" 11 #include "caffe2/core/tensor.h" 12 #include "caffe2/onnx/onnx_exporter.h" 13 #include "caffe2/opt/backend_cutting.h" 14 #include "caffe2/utils/proto_utils.h" 15 #include "caffe2/utils/string_utils.h" 20 const std::string kRealBatchSizeBlob(
"real_batch_size");
21 constexpr
size_t kBufferSize = 64;
24 std::unordered_map<std::string, TensorShape> stripShapeInfoMap(
25 const ShapeInfoMap& info_map) {
26 std::unordered_map<std::string, TensorShape> shape_map;
27 for (
const auto& kv : info_map) {
28 shape_map.emplace(kv.first, kv.second.shape);
33 uint64_t onnxifiDataType(caffe2::TensorProto::DataType t) {
34 #define CAFFE2_TO_ONNXIFI_TYPE(x, y) \ 35 case (caffe2::TensorProto::x): \ 38 CAFFE2_TO_ONNXIFI_TYPE(FLOAT, ONNXIFI_DATATYPE_FLOAT32);
39 CAFFE2_TO_ONNXIFI_TYPE(INT8, ONNXIFI_DATATYPE_INT8);
40 CAFFE2_TO_ONNXIFI_TYPE(UINT8, ONNXIFI_DATATYPE_UINT8);
41 CAFFE2_TO_ONNXIFI_TYPE(INT16, ONNXIFI_DATATYPE_INT16);
42 CAFFE2_TO_ONNXIFI_TYPE(UINT16, ONNXIFI_DATATYPE_UINT16);
43 CAFFE2_TO_ONNXIFI_TYPE(INT32, ONNXIFI_DATATYPE_INT32);
44 CAFFE2_TO_ONNXIFI_TYPE(INT64, ONNXIFI_DATATYPE_INT64);
45 CAFFE2_TO_ONNXIFI_TYPE(FLOAT16, ONNXIFI_DATATYPE_FLOAT16);
47 LOG(WARNING) <<
"Unsupported Caffe2 tensor type: " << t
48 <<
", fallback to FLOAT";
49 return ONNXIFI_DATATYPE_FLOAT32;
51 #undef CAFFE2_TO_ONNXIFI_TYPE 54 std::vector<::ONNX_NAMESPACE::ValueInfoProto> convertToValueInfo(
55 const std::vector<std::string>& names,
56 const std::unordered_map<std::string, TensorShape>& shape_hints,
57 const std::unordered_map<std::string, ::ONNX_NAMESPACE::TypeProto>&
59 std::vector<::ONNX_NAMESPACE::ValueInfoProto> r;
60 for (
const auto& s : names) {
62 auto& value_info = r.back();
63 value_info.set_name(s);
64 const auto it = shape_hints.find(s);
65 if (it == shape_hints.end()) {
66 const auto eit = extra_shape_hints.find(s);
67 if (eit == extra_shape_hints.end()) {
68 LOG(WARNING) <<
"Cannot get shape of " << s;
70 value_info.mutable_type()->CopyFrom(eit->second);
73 auto* tensor_type = value_info.mutable_type()->mutable_tensor_type();
74 tensor_type->set_elem_type(
75 onnx::Caffe2TypeToOnnxType(it->second.data_type()));
76 auto* shape = tensor_type->mutable_shape();
77 for (
int i = 0; i < it->second.dims().size(); ++i) {
78 shape->add_dim()->set_dim_value(it->second.dims(i));
96 void getWeightsAndInputs(
98 const std::unordered_set<std::string>& weights_in_ws,
99 const std::vector<std::string>& extra_weights,
100 std::unordered_set<std::string>* initialization_list,
101 std::vector<std::string>* total_inputs_vec) {
102 std::unordered_set<std::string> total_inputs;
105 for (
const auto& extra_weight : extra_weights) {
106 if (total_inputs.emplace(extra_weight).second) {
107 total_inputs_vec->emplace_back(extra_weight);
109 initialization_list->emplace(extra_weight);
113 std::unordered_set<std::string> boundary_inputs;
114 for (
const auto& i : net.external_input()) {
115 boundary_inputs.emplace(i);
118 for (
const auto& op : net.op()) {
119 for (
const auto& input : op.input()) {
120 bool not_seen = total_inputs.emplace(input).second;
124 if (weights_in_ws.count(input)) {
126 total_inputs_vec->emplace_back(input);
127 initialization_list->emplace(input);
128 VLOG(2) <<
"Add weights: " << input;
129 }
else if (boundary_inputs.count(input)) {
130 VLOG(2) <<
"Adding boundary input: " << input;
131 total_inputs_vec->emplace_back(input);
137 void unrollIfOps(NetDef* net) {
140 for (
const auto& op : net->op()) {
141 if (op.type() ==
"If") {
142 ArgumentHelper helper(op);
143 if (helper.HasSingleArgumentOfType<NetDef>(
"then_net")) {
144 auto then_net = helper.GetSingleArgument<NetDef>(
"then_net", NetDef());
145 for (
const auto& nested_op : then_net.op()) {
146 clone.add_op()->CopyFrom(nested_op);
149 if (helper.HasSingleArgumentOfType<NetDef>(
"else_net")) {
150 auto else_net = helper.GetSingleArgument<NetDef>(
"else_net", NetDef());
151 for (
const auto& nested_op : else_net.op()) {
152 clone.add_op()->CopyFrom(nested_op);
156 clone.add_op()->CopyFrom(op);
162 void fillModelInfo(::ONNX_NAMESPACE::ModelProto* model) {
163 model->set_ir_version(::ONNX_NAMESPACE::Version::IR_VERSION);
164 model->set_producer_name(
"caffe2");
165 auto* opset_id = model->add_opset_import();
166 opset_id->set_domain(
"");
167 opset_id->set_version(7);
170 std::string MakeSeqSizeBlob(
const std::string& blob_name) {
171 return blob_name +
"_real_seq_size";
174 std::string MakeOutputForAdjustBatchOp(
const std::string& input) {
175 return input +
"_post_adjust_batch";
178 std::string MakeInputForAdjustBatchOp(
const std::string& output) {
179 return output +
"_pre_adjust_batch";
182 OperatorDef MakeAdjustBatchOp(
183 const std::string& input_blob,
184 const std::string& output_blob,
186 const std::string& real_batch_size_blob,
187 bool adjust_to_max_batch_size) {
188 OperatorDef adjust_batch_op;
189 adjust_batch_op.set_type(
"AdjustBatch");
190 auto* arg = adjust_batch_op.add_arg();
191 arg->set_name(
"max_batch_size");
192 arg->set_i(max_batch_size);
193 adjust_batch_op.add_input(input_blob);
194 adjust_batch_op.add_output(output_blob);
195 if (adjust_to_max_batch_size) {
196 if (!real_batch_size_blob.empty()) {
197 adjust_batch_op.add_output(real_batch_size_blob);
200 adjust_batch_op.add_input(real_batch_size_blob);
202 return adjust_batch_op;
205 std::unordered_set<string> ToHashSet(
206 const ::google::protobuf::RepeatedPtrField<string>& strs) {
207 return std::unordered_set<string>(strs.begin(), strs.end());
210 int64_t GetBlob1stDimSize(
211 const ShapeInfo& shape_info,
212 const string& blob_name) {
213 if (shape_info.shape.dims_size() == 0) {
216 return shape_info.shape.dims(0);
224 std::unordered_map<std::string, std::string> AddAdjustBatchOps(
225 const ShapeInfoMap& shape_hints,
227 vector<OperatorDef>* input_ops,
228 vector<OperatorDef>* output_ops) {
229 std::unordered_map<std::string, std::string> renaming_map;
230 const auto external_inputs = ToHashSet(onnxifi_net->external_input());
231 const auto external_outputs = ToHashSet(onnxifi_net->external_output());
232 std::unordered_set<std::string> real_batch_size_blobs;
233 std::unordered_set<std::string> post_adjust_inputs;
235 for (
auto& op : *(onnxifi_net->mutable_op())) {
241 for (
auto& input_blob : *(op.mutable_input())) {
242 if (external_inputs.count(input_blob)) {
243 auto shape_info_it = shape_hints.find(input_blob);
244 if (shape_info_it == shape_hints.end()) {
245 LOG(WARNING) <<
"Cannot find shape_info for external input blob: " 249 std::string real_batch_size_blob =
"";
250 auto max_batch_size = 0;
251 if (shape_info_it->second.dim_type == ShapeInfo::DimType::BATCH) {
252 max_batch_size = GetBlob1stDimSize(shape_info_it->second, input_blob);
253 real_batch_size_blob =
254 kRealBatchSizeBlob +
"_" + c10::to_string(max_batch_size);
255 }
else if (shape_info_it->second.dim_type == ShapeInfo::DimType::SEQ) {
256 max_batch_size = GetBlob1stDimSize(shape_info_it->second, input_blob);
257 real_batch_size_blob = MakeSeqSizeBlob(input_blob);
262 auto output_blob = MakeOutputForAdjustBatchOp(input_blob);
263 auto ret = real_batch_size_blobs.emplace(real_batch_size_blob);
264 if (post_adjust_inputs.emplace(output_blob).second) {
265 input_ops->push_back(MakeAdjustBatchOp(
269 ret.second ? real_batch_size_blob :
"",
272 renaming_map[input_blob] = output_blob;
273 input_blob = output_blob;
274 }
else if (renaming_map.count(input_blob)) {
279 input_blob = renaming_map[input_blob];
285 for (
auto& output_blob : *(op.mutable_output())) {
286 if (external_outputs.count(output_blob)) {
287 auto shape_info_it = shape_hints.find(output_blob);
289 shape_info_it != shape_hints.end(),
290 "Cannot find shape info for ",
292 " for AdjustBatchOp insertion");
293 if (shape_info_it->second.dim_type == ShapeInfo::DimType::BATCH) {
294 auto max_batch_size =
295 GetBlob1stDimSize(shape_info_it->second, output_blob);
296 std::string real_size_blob =
297 kRealBatchSizeBlob +
"_" + c10::to_string(max_batch_size);
299 real_batch_size_blobs.count(real_size_blob),
303 " to make AdjustBatchOp");
304 auto input_blob = MakeInputForAdjustBatchOp(output_blob);
305 output_ops->push_back(MakeAdjustBatchOp(
311 renaming_map[output_blob] = input_blob;
312 output_blob = input_blob;
313 }
else if (shape_info_it->second.dim_type == ShapeInfo::DimType::SEQ) {
314 LOG(WARNING) <<
"It's unusual that output tesnor " << output_blob
315 <<
" is of dim_type SEQ. " 316 <<
"AdjustBatchOp won't attached " 317 <<
"and it might degrade the performance";
326 NetDef ComposeResultNet(
327 const vector<OperatorDef>& input_ops,
328 const vector<OperatorDef>& output_ops,
329 const OperatorDef& onnxifi_op) {
331 for (
const auto& op : input_ops) {
332 net_opt.add_op()->CopyFrom(op);
334 net_opt.add_op()->CopyFrom(onnxifi_op);
336 for (
const auto& op : output_ops) {
337 net_opt.add_op()->CopyFrom(op);
344 OnnxifiTransformer::OnnxifiTransformer(
const OnnxifiTransformerOptions& opts)
345 : BackendTransformerBase(), opts_(opts) {
346 lib_ = onnx::initOnnxifiLibrary();
347 CAFFE_ENFORCE(lib_,
"Cannot initialize ONNXIFI library");
349 lib_->onnxGetBackendIDs(
nullptr, &num_backends_),
350 ONNXIFI_STATUS_FALLBACK);
352 num_backends_, 0,
"At least 1 onnxifi backend should be available");
353 backend_ids_.resize(num_backends_);
355 lib_->onnxGetBackendIDs(backend_ids_.data(), &num_backends_),
356 ONNXIFI_STATUS_SUCCESS);
359 OnnxifiTransformer::~OnnxifiTransformer() {
360 for (
unsigned i = 0; i < num_backends_; ++i) {
361 if (lib_->onnxReleaseBackendID(backend_ids_[i]) != ONNXIFI_STATUS_SUCCESS) {
362 LOG(ERROR) <<
"Error when calling onnxReleaseBackendID";
367 OperatorDef OnnxifiTransformer::BuildOnnxifiOp(
368 const std::string& onnx_model_str,
369 const std::unordered_map<std::string, TensorShape>& output_shape_hints,
370 const std::unordered_set<std::string>& initialization_list,
371 const std::vector<std::string>& external_inputs,
372 const std::vector<std::string>& external_outputs) {
374 op.set_type(
"Onnxifi");
375 auto* onnx_model_arg = op.add_arg();
376 onnx_model_arg->set_name(
"onnx_model");
377 onnx_model_arg->set_s(onnx_model_str);
381 auto* initializers_arg = op.add_arg();
382 initializers_arg->set_name(
"initializers");
383 for (
const auto& s : initialization_list) {
384 initializers_arg->add_strings(s);
388 auto* input_names = op.add_arg();
389 input_names->set_name(
"input_names");
390 for (
const auto& input : external_inputs) {
391 if (!initialization_list.count(input)) {
393 input_names->add_strings(input);
396 auto* output_names = op.add_arg();
397 output_names->set_name(
"output_names");
398 for (
const auto& output : external_outputs) {
399 op.add_output(output);
400 output_names->add_strings(output);
404 for (
int i = 0; i < op.output_size(); ++i) {
405 const auto& o = op.output(i);
406 const auto it = output_shape_hints.find(o);
407 if (it != output_shape_hints.end()) {
408 const auto& shape = it->second;
409 auto* output_shape_hint_arg = op.add_arg();
410 output_shape_hint_arg->set_name(c10::str(
"output_shape_hint_", i));
411 output_shape_hint_arg->add_ints(onnxifiDataType(shape.data_type()));
412 for (
const auto& d : shape.dims()) {
413 output_shape_hint_arg->add_ints(d);
416 VLOG(2) <<
"Adding output hint: " << o;
421 AddArgument(
"use_onnx", opts_.use_onnx ? 1 : 0, &op);
424 AddArgument(
"backend_id", idx_, &op);
427 AddArgument(kModelId, model_id_, &op);
428 AddArgument(kNetPos, c10::to_string(onnxifi_op_id_++), &op);
433 NetDef OnnxifiTransformer::SubnetToOnnxifiOpViaC2(
434 const caffe2::NetDef& net,
435 const std::unordered_set<std::string>& weights_in_ws,
436 const ShapeInfoMap& shape_hints) {
437 int onnxifi_op_id = onnxifi_op_id_;
439 WriteProtoToTextFile(
440 net,
"debug_original_net_" + c10::to_string(onnxifi_op_id) +
".pb_txt");
442 if (opts_.min_ops > net.op_size()) {
446 NetDef onnxifi_net(net);
454 std::unordered_set<std::string> split_infos;
455 for (
auto& op : *onnxifi_net.mutable_op()) {
456 if ((op.type() ==
"Concat" || op.type() ==
"Reshape") &&
457 op.output_size() == 2) {
458 split_infos.emplace(op.output(1));
461 onnxifi_net.clear_external_output();
462 for (
const auto& o : net.external_output()) {
463 if (!split_infos.count(o)) {
464 onnxifi_net.add_external_output(o);
471 vector<OperatorDef> input_ops;
472 vector<OperatorDef> output_ops;
473 std::unordered_map<std::string, std::string> renaming_map;
474 if (opts_.add_adjust_batch_ops) {
476 AddAdjustBatchOps(shape_hints, &onnxifi_net, &input_ops, &output_ops);
480 std::unordered_set<std::string> initialization_list;
481 std::vector<std::string> total_inputs_vec;
485 std::vector<std::string>(),
486 &initialization_list,
488 auto* shape_arg = onnxifi_net.add_arg();
489 shape_arg->set_name(
"input_shape_info");
490 onnxifi_net.clear_external_input();
491 for (
const auto& i : total_inputs_vec) {
493 const auto it = renaming_map.find(i);
494 if (it != renaming_map.end()) {
497 onnxifi_net.add_external_input(input);
498 shape_arg->mutable_tensors()->Add()->CopyFrom(
499 wrapShapeInfoIntoTensorProto(input, shape_hints.at(i)));
503 std::unordered_map<std::string, TensorShape> output_shape_hints;
504 for (
auto& o : *onnxifi_net.mutable_external_output()) {
506 const auto rit = renaming_map.find(o);
507 if (rit != renaming_map.end()) {
508 output = rit->second;
510 const auto it = shape_hints.find(o);
512 it != shape_hints.end(),
"Cannot find shape info for output ", o);
513 const auto& shape = it->second.shape;
514 output_shape_hints.emplace(output, shape);
519 std::vector<std::string> onnxifi_net_inputs(
520 onnxifi_net.external_input().begin(), onnxifi_net.external_input().end());
521 std::vector<std::string> onnxifi_net_outputs(
522 onnxifi_net.external_output().begin(),
523 onnxifi_net.external_output().end());
524 std::string model_str;
525 onnxifi_net.SerializeToString(&model_str);
526 auto onnxifi_op = BuildOnnxifiOp(
531 onnxifi_net_outputs);
532 NetDef net_opt = ComposeResultNet(input_ops, output_ops, onnxifi_op);
536 WriteProtoToTextFile(
538 "debug_onnxifi_net_" + c10::to_string(onnxifi_op_id) +
".pb_txt");
539 WriteProtoToTextFile(
541 "debug_optimized_net_" + c10::to_string(onnxifi_op_id) +
".pb_txt");
546 NetDef OnnxifiTransformer::SubnetToOnnxifiOpViaOnnx(
547 const caffe2::NetDef& net,
548 const std::unordered_set<std::string>& weights_in_ws,
550 onnx::OnnxExporter* exporter,
551 ShapeInfoMap* shape_hints) {
552 if (opts_.min_ops > net.op_size()) {
555 ::ONNX_NAMESPACE::ModelProto onnx_model;
556 fillModelInfo(&onnx_model);
558 caffe2::NetDef onnxifi_net(net);
559 vector<OperatorDef> input_ops;
560 vector<OperatorDef> output_ops;
562 AddAdjustBatchOps(*shape_hints, &onnxifi_net, &input_ops, &output_ops);
563 for (
const auto& kv : renaming_map) {
564 shape_hints_onnx_.emplace(kv.second, shape_hints_onnx_.at(kv.first));
569 CPUContext context(option);
570 context.SwitchToDevice();
571 std::vector<std::string> extra_weights;
572 for (
const auto& op : onnxifi_net.op()) {
573 const auto results = exporter->Caffe2OpToOnnxNodes(op, shape_hints_onnx_);
574 for (
const auto& n : results.first) {
575 onnx_model.mutable_graph()->add_node()->CopyFrom(n);
577 for (
const auto& t : results.second) {
578 VLOG(2) <<
"Adding extra init tensor: " << t.name();
580 shape.mutable_dims()->CopyFrom(t.dims());
581 auto ret = shape_hints_onnx_.emplace(t.name(), std::move(shape));
582 shape_hints->emplace(
583 std::piecewise_construct,
584 std::forward_as_tuple(ret.first->first),
585 std::forward_as_tuple(
586 ShapeInfo::DimType::CONSTANT, ret.first->second));
589 auto* blob = ws->CreateBlob(t.name());
590 auto* cpu_tensor = BlobGetMutableTensor(blob, CPU);
591 std::vector<int64_t> dims;
592 for(
const auto& d : t.dims()) {
595 cpu_tensor->Resize(dims);
596 if (t.data_type() == ::ONNX_NAMESPACE::TensorProto::FLOAT) {
597 context.CopyBytesSameDevice(
598 cpu_tensor->numel() *
sizeof(float),
599 static_cast<const void*>(t.raw_data().data()),
600 cpu_tensor->raw_mutable_data(TypeMeta::Make<float>()));
601 }
else if (t.data_type() == ::ONNX_NAMESPACE::TensorProto::INT64) {
602 context.CopyBytesSameDevice(
603 cpu_tensor->numel() *
sizeof(int64_t),
604 static_cast<const void*>(t.raw_data().data()),
605 cpu_tensor->raw_mutable_data(TypeMeta::Make<int64_t>()));
608 "Unsupported tensor data type for conversion: ", t.data_type());
610 context.FinishDeviceComputation();
613 extra_weights.emplace_back(t.name());
618 std::vector<std::string> onnxifi_net_outputs;
619 for (
const auto& o : net.external_output()) {
621 const auto it = renaming_map.find(o);
622 if (it != renaming_map.end()) {
625 onnxifi_net_outputs.emplace_back(output);
627 auto io_vec = convertToValueInfo(
630 std::unordered_map<std::string, ::ONNX_NAMESPACE::TypeProto>());
631 std::unordered_map<std::string, TensorShape> output_shape_hints;
632 for (
const auto& i : io_vec) {
633 onnx_model.mutable_graph()->add_output()->CopyFrom(i);
634 const auto it = shape_hints_onnx_.find(i.name());
636 it != shape_hints_onnx_.end(),
637 "Cannot find shape info for output ",
639 const auto& shape = it->second;
640 output_shape_hints.emplace(i.name(), shape);
644 std::unordered_set<std::string> initialization_list;
645 std::vector<std::string> onnxifi_net_inputs;
650 &initialization_list,
651 &onnxifi_net_inputs);
652 for (
auto& i : onnxifi_net_inputs) {
653 const auto it = renaming_map.find(i);
654 if (it != renaming_map.end()) {
658 io_vec = convertToValueInfo(
661 std::unordered_map<std::string, ::ONNX_NAMESPACE::TypeProto>());
662 for (
const auto& i : io_vec) {
663 onnx_model.mutable_graph()->add_input()->CopyFrom(i);
667 std::string model_str;
668 onnx_model.SerializeToString(&model_str);
669 auto onnxifi_op = BuildOnnxifiOp(
674 onnxifi_net_outputs);
675 NetDef net_opt = ComposeResultNet(input_ops, output_ops, onnxifi_op);
679 WriteProtoToTextFile(onnx_model,
"debug_onnxifi_net.onnx_txt");
680 WriteProtoToTextFile(net_opt,
"debug_optimized_net.pb_txt");
685 bool OnnxifiTransformer::supportOpOnnx(
686 const caffe2::OperatorDef& op,
687 onnx::OnnxExporter* exporter,
688 const std::unordered_set<int>& blacklisted_ops,
689 onnxBackendID backend_id)
const {
692 ArgumentHelper::GetSingleArgument<OperatorDef, int>(op, kNetPos, -1);
693 if (blacklisted_ops.count(pos)) {
694 LOG(INFO) <<
"Skipping blacklisted op " << op.type() <<
" at pos " << pos;
697 const OpSchema* schema = OpSchemaRegistry::Schema(op.type());
700 if (!schema || schema->onnx_schema().empty()) {
701 LOG(INFO) <<
"Cannot export c2 op " << op.type()
702 <<
" to onnx as there is no corresponding ONNX schema.";
706 ::ONNX_NAMESPACE::ModelProto onnx_model;
707 fillModelInfo(&onnx_model);
708 auto results = exporter->Caffe2OpToOnnxNodes(op, shape_hints_onnx_);
709 std::unordered_set<std::string> used_inputs;
710 std::unordered_set<std::string> used_outputs;
711 std::vector<std::string> boundary_inputs;
712 std::vector<std::string> boundary_outputs;
713 std::unordered_set<std::string> reshape_info;
715 for (
const auto& n : results.first) {
716 onnx_model.mutable_graph()->add_node()->CopyFrom(n);
717 for (
const auto& i : n.input()) {
718 bool is_new = used_inputs.emplace(i).second;
721 if (is_new && !used_outputs.count(i)) {
722 boundary_inputs.emplace_back(i);
725 for (
const auto& o : n.output()) {
726 used_outputs.emplace(o);
731 if (n.op_type() ==
"Reshape" && n.input_size() > 1) {
732 reshape_info.emplace(n.input(1));
737 used_outputs.clear();
738 for (
const auto& n : results.first) {
739 for (
const auto& o : n.output()) {
740 bool is_new = used_outputs.emplace(o).second;
741 if (is_new && !used_inputs.count(o)) {
742 boundary_outputs.emplace_back(o);
746 std::unordered_map<std::string, ::ONNX_NAMESPACE::TypeProto>
748 for (
const auto& t : results.second) {
749 extra_shape_hints.emplace(t.name(), onnx::ExtraTypeProto(t));
750 if (reshape_info.count(t.name())) {
751 onnx_model.mutable_graph()->add_initializer()->CopyFrom(t);
756 auto io_vec = convertToValueInfo(
757 boundary_inputs, shape_hints_onnx_, extra_shape_hints);
758 for (
const auto& i : io_vec) {
759 onnx_model.mutable_graph()->add_input()->CopyFrom(i);
761 io_vec = convertToValueInfo(
762 boundary_outputs, shape_hints_onnx_, extra_shape_hints);
763 for (
const auto& i : io_vec) {
764 onnx_model.mutable_graph()->add_output()->CopyFrom(i);
767 std::string onnx_model_str;
768 onnx_model.SerializeToString(&onnx_model_str);
769 auto ret = lib_->onnxGetBackendCompatibility(
770 backend_id, onnx_model_str.size(), onnx_model_str.c_str());
771 if (ret != ONNXIFI_STATUS_SUCCESS) {
772 LOG(INFO) <<
"Don't support onnx for " << op.type() <<
" c2 op (" << ret
778 }
catch (
const std::exception& ex) {
779 LOG(ERROR) <<
"Caught exception when converting op " << op.type()
780 <<
", what: " << ex.what();
785 bool OnnxifiTransformer::supportOpC2(
786 const caffe2::OperatorDef& op,
787 const ShapeInfoMap& shape_hints,
788 const std::unordered_set<int>& blacklisted_ops,
789 onnxBackendID backend_id)
const {
792 ArgumentHelper::GetSingleArgument<OperatorDef, int>(op, kNetPos, -1);
793 if (blacklisted_ops.count(pos)) {
794 LOG(INFO) <<
"Skipping blacklisted op " << op.type() <<
" at pos " << pos;
800 net.add_op()->CopyFrom(op);
801 for (
const auto& i : op.input()) {
802 net.add_external_input(i);
804 for (
const auto& o : op.output()) {
805 net.add_external_output(o);
808 if ((op.type() ==
"Concat" || op.type() ==
"Reshape") &&
809 op.output_size() == 2) {
810 net.mutable_external_output()->RemoveLast();
814 auto* shape_arg = net.add_arg();
815 shape_arg->set_name(
"input_shape_info");
816 for (
const auto& i : op.input()) {
817 const auto it = shape_hints.find(i);
818 if (it == shape_hints.end()) {
821 shape_arg->mutable_tensors()->Add()->CopyFrom(
822 wrapShapeInfoIntoTensorProto(i, it->second));
824 shape_arg = net.add_arg();
825 shape_arg->set_name(
"output_shape_info");
826 for (
const auto& i : op.output()) {
827 const auto it = shape_hints.find(i);
828 if (it == shape_hints.end()) {
831 shape_arg->mutable_tensors()->Add()->CopyFrom(
832 wrapShapeInfoIntoTensorProto(i, it->second));
835 std::string c2_model_str;
836 net.SerializeToString(&c2_model_str);
837 auto ret = lib_->onnxGetBackendCompatibility(
838 backend_id, c2_model_str.size(), c2_model_str.c_str());
839 if (ret != ONNXIFI_STATUS_SUCCESS) {
840 LOG(INFO) <<
"Don't support c2 op " << op.type() <<
" (" << ret <<
")";
845 }
catch (
const std::exception& ex) {
846 LOG(ERROR) <<
"Caught exception when converting op " << op.type()
847 <<
", what: " << ex.what();
852 void OnnxifiTransformer::tieGatherAndSparseLengthsWeightedSumOps(
854 const ShapeInfoMap& shape_hints,
855 std::unordered_set<int>* blacklisted_ops)
const {
856 std::unordered_map<std::string, int> output_pos;
857 onnx::OnnxExporter exporter(
nullptr);
858 onnxBackendID backend_id = backend_ids_[idx_];
860 for (
const auto& op : net.op()) {
861 if (op.type() ==
"Gather") {
863 ArgumentHelper::GetSingleArgument<OperatorDef, int>(op, kNetPos, -1);
864 for (
const auto& output : op.output()) {
865 output_pos.emplace(output, pos);
867 }
else if (StartsWith(op.type(),
"SparseLengthsWeighted")) {
868 auto supported = opts_.use_onnx
869 ? supportOpOnnx(op, &exporter, *blacklisted_ops, backend_id)
870 : supportOpC2(op, shape_hints, *blacklisted_ops, backend_id);
871 if (!supported && op.input_size() > 1) {
872 const auto it = output_pos.find(op.input(1));
873 if (it == output_pos.end()) {
876 blacklisted_ops->emplace(it->second);
879 blacklisted_ops->emplace(
880 ArgumentHelper::GetSingleArgument<OperatorDef, int>(
887 void OnnxifiTransformer::applyFilteringRules(
889 const ShapeInfoMap& shape_hints,
890 std::unordered_set<int>* blacklisted_ops)
const {
891 tieGatherAndSparseLengthsWeightedSumOps(net, shape_hints, blacklisted_ops);
894 void OnnxifiTransformer::getBackendId() {
897 if (opts_.use_onnx) {
902 char buf[kBufferSize];
903 for (
int i = 0; i < backend_ids_.size(); ++i) {
904 size_t len = kBufferSize;
905 auto ret = lib_->onnxGetBackendInfo(
906 backend_ids_[i], ONNXIFI_BACKEND_DEVICE, buf, &len);
907 if (ret == ONNXIFI_STATUS_SUCCESS && strstr(buf,
"Caffe2")) {
908 LOG(INFO) <<
"Using backend with Caffe2 Proto, ID: " << i;
915 NetDef OnnxifiTransformer::TransformViaC2(
917 const std::unordered_set<std::string>& weights,
918 const std::unordered_set<int>& blacklisted_ops,
919 const ShapeInfoMap& shape_hints) {
920 onnxBackendID backend_id = backend_ids_[idx_];
922 auto c2_supports = [
this, &shape_hints, &blacklisted_ops, backend_id](
923 const caffe2::OperatorDef& op) {
924 return supportOpC2(op, shape_hints, blacklisted_ops, backend_id);
928 [
this, &weights, &shape_hints](
const caffe2::NetDef& net) {
929 return SubnetToOnnxifiOpViaC2(net, weights, shape_hints);
932 return opt::OptimizeForBackend(*pred_net, c2_supports, c2_converter);
935 NetDef OnnxifiTransformer::TransformViaOnnx(
938 const std::unordered_set<std::string>& weights,
939 const std::unordered_set<int>& blacklisted_ops,
940 ShapeInfoMap* shape_hints) {
941 onnxBackendID backend_id = backend_ids_[idx_];
944 onnx::OnnxExporter exporter(
nullptr);
945 auto onnx_supports = [
this, &exporter, &blacklisted_ops, backend_id](
946 const caffe2::OperatorDef& op) {
947 return supportOpOnnx(op, &exporter, blacklisted_ops, backend_id);
953 onnx::OnnxExporter exporter2(
nullptr);
954 auto onnx_converter = [
this, ws, &weights, shape_hints, &exporter2](
955 const caffe2::NetDef& net)
mutable {
956 return SubnetToOnnxifiOpViaOnnx(net, weights, ws, &exporter2, shape_hints);
959 return opt::OptimizeForBackend(
960 *pred_net, onnx_supports, onnx_converter, opts_.debug);
965 void OnnxifiTransformer::transform(
968 const std::vector<std::string>& weight_names,
969 const std::unordered_map<std::string, TensorShape>& input_shape_hints,
970 const std::unordered_set<int>& blacklisted_ops) {
972 CAFFE_ENFORCE(pred_net,
"Predict net cannot be nullptr");
975 model_id_ = getModelId(*pred_net);
979 unrollIfOps(pred_net);
981 std::unordered_set<std::string> weights(
982 weight_names.begin(), weight_names.end());
985 auto shape_hints_mapped =
986 ssaRewriteAndMapNames(ws, pred_net, input_shape_hints);
995 Workspace mapped_ws(ws, input_mapping_);
996 ShapeInfoMap shape_hints = inferShapes(
997 &mapped_ws, pred_net, shape_hints_mapped, opts_.bound_shape_spec);
998 if (opts_.use_onnx) {
999 shape_hints_onnx_ = stripShapeInfoMap(shape_hints);
1003 NetDef shape_net(*pred_net);
1004 auto* shape_arg = shape_net.add_arg();
1005 shape_arg->set_name(
"shape_info");
1006 for (
const auto& kv : shape_hints) {
1007 auto t = wrapShapeInfoIntoTensorProto(kv.first, kv.second);
1008 t.add_int32_data(static_cast<int32_t>(kv.second.dim_type));
1009 shape_arg->mutable_tensors()->Add()->CopyFrom(t);
1011 WriteProtoToTextFile(shape_net,
"debug_ssa_net.pb_txt");
1018 std::unordered_set<int> new_blacklisted_ops(
1019 blacklisted_ops.begin(), blacklisted_ops.end());
1020 applyFilteringRules(*pred_net, shape_hints, &new_blacklisted_ops);
1023 NetDef net_opt = opts_.use_onnx
1025 ws, pred_net, weights, new_blacklisted_ops, &shape_hints)
1026 : TransformViaC2(pred_net, weights, new_blacklisted_ops, shape_hints);
1029 net_opt.mutable_device_option()->CopyFrom(pred_net->device_option());
1032 WriteProtoToTextFile(net_opt,
"debug_full_opt_net.pb_txt");
1034 pred_net->Swap(&net_opt);
A global dictionary that holds information about what Caffe2 modules have been loaded in the current ...