Caffe2 - C++ API
A deep learning, cross platform ML framework
1 #include "caffe2/opt/onnxifi_transformer.h"
3 #include <iostream>
4 #include <unordered_set>
6 #include "onnx/proto_utils.h"
8 #include "caffe2/core/context.h"
9 #include "caffe2/core/logging.h"
10 #include "caffe2/core/operator.h"
11 #include "caffe2/core/tensor.h"
12 #include "caffe2/onnx/onnx_exporter.h"
13 #include "caffe2/opt/backend_cutting.h"
14 #include "caffe2/utils/proto_utils.h"
15 #include "caffe2/utils/string_utils.h"
17 namespace caffe2 {
19 namespace {
20 const std::string kRealBatchSizeBlob("real_batch_size");
21 constexpr size_t kBufferSize = 64;
23 // Convert ShapeInfo map to TensorShape map
24 std::unordered_map<std::string, TensorShape> stripShapeInfoMap(
25  const ShapeInfoMap& info_map) {
26  std::unordered_map<std::string, TensorShape> shape_map;
27  for (const auto& kv : info_map) {
28  shape_map.emplace(kv.first, kv.second.shape);
29  }
30  return shape_map;
31 }
33 uint64_t onnxifiDataType(caffe2::TensorProto::DataType t) {
34 #define CAFFE2_TO_ONNXIFI_TYPE(x, y) \
35  case (caffe2::TensorProto::x): \
36  return y
37  switch (t) {
46  default:
47  LOG(WARNING) << "Unsupported Caffe2 tensor type: " << t
48  << ", fallback to FLOAT";
50  }
52 }
54 std::vector<::ONNX_NAMESPACE::ValueInfoProto> convertToValueInfo(
55  const std::vector<std::string>& names,
56  const std::unordered_map<std::string, TensorShape>& shape_hints,
57  const std::unordered_map<std::string, ::ONNX_NAMESPACE::TypeProto>&
58  extra_shape_hints) {
59  std::vector<::ONNX_NAMESPACE::ValueInfoProto> r;
60  for (const auto& s : names) {
61  r.emplace_back();
62  auto& value_info = r.back();
63  value_info.set_name(s);
64  const auto it = shape_hints.find(s);
65  if (it == shape_hints.end()) {
66  const auto eit = extra_shape_hints.find(s);
67  if (eit == extra_shape_hints.end()) {
68  LOG(WARNING) << "Cannot get shape of " << s;
69  } else {
70  value_info.mutable_type()->CopyFrom(eit->second);
71  }
72  } else {
73  auto* tensor_type = value_info.mutable_type()->mutable_tensor_type();
74  tensor_type->set_elem_type(
75  onnx::Caffe2TypeToOnnxType(it->second.data_type()));
76  auto* shape = tensor_type->mutable_shape();
77  for (int i = 0; i < it->second.dims().size(); ++i) {
78  shape->add_dim()->set_dim_value(it->second.dims(i));
79  }
80  }
81  }
82  return r;
83 }
85 // Given a net, with primiary inputs and outputs defined in its
86 // external_inputs/outputs, and given the set of weights and extra weights
87 // (created during conversion to ONNX if exists), we check whether some of the
88 // weights are used in the net, and if so, we put it in the initialize_list and
89 // add it to the external_inputs too.
90 // \param net [in] c2 net (cutoff from a bigger net)
91 // \param weights_in_ws [in] all the weights in the workspace
92 // \param extra_weights [in] extra weights possibly generated during ONNX
93 // conversion \param initialization_list [out] weights that needs to be offload
94 // to backend \param total_inputs_vec [out] total #inputs of the net that
95 // doesn't have a producer
96 void getWeightsAndInputs(
97  const NetDef& net,
98  const std::unordered_set<std::string>& weights_in_ws,
99  const std::vector<std::string>& extra_weights,
100  std::unordered_set<std::string>* initialization_list,
101  std::vector<std::string>* total_inputs_vec) {
102  std::unordered_set<std::string> total_inputs;
104  // extra weights is definitely extra weights/inputs
105  for (const auto& extra_weight : extra_weights) {
106  if (total_inputs.emplace(extra_weight).second) {
107  total_inputs_vec->emplace_back(extra_weight);
108  }
109  initialization_list->emplace(extra_weight);
110  }
112  // Boundary inputs that should not be weights
113  std::unordered_set<std::string> boundary_inputs;
114  for (const auto& i : net.external_input()) {
115  boundary_inputs.emplace(i);
116  }
118  for (const auto& op : net.op()) {
119  for (const auto& input : op.input()) {
120  bool not_seen = total_inputs.emplace(input).second;
121  if (!not_seen) {
122  continue;
123  }
124  if (weights_in_ws.count(input)) {
125  // We add weights as inputs too
126  total_inputs_vec->emplace_back(input);
127  initialization_list->emplace(input);
128  VLOG(2) << "Add weights: " << input;
129  } else if (boundary_inputs.count(input)) {
130  VLOG(2) << "Adding boundary input: " << input;
131  total_inputs_vec->emplace_back(input);
132  }
133  }
134  }
135 }
137 void unrollIfOps(NetDef* net) {
138  NetDef clone(*net);
139  clone.clear_op();
140  for (const auto& op : net->op()) {
141  if (op.type() == "If") {
142  ArgumentHelper helper(op);
143  if (helper.HasSingleArgumentOfType<NetDef>("then_net")) {
144  auto then_net = helper.GetSingleArgument<NetDef>("then_net", NetDef());
145  for (const auto& nested_op : then_net.op()) {
146  clone.add_op()->CopyFrom(nested_op);
147  }
148  }
149  if (helper.HasSingleArgumentOfType<NetDef>("else_net")) {
150  auto else_net = helper.GetSingleArgument<NetDef>("else_net", NetDef());
151  for (const auto& nested_op : else_net.op()) {
152  clone.add_op()->CopyFrom(nested_op);
153  }
154  }
155  } else {
156  clone.add_op()->CopyFrom(op);
157  }
158  }
159  net->Swap(&clone);
160 }
162 void fillModelInfo(::ONNX_NAMESPACE::ModelProto* model) {
163  model->set_ir_version(::ONNX_NAMESPACE::Version::IR_VERSION);
164  model->set_producer_name("caffe2");
165  auto* opset_id = model->add_opset_import();
166  opset_id->set_domain("");
167  opset_id->set_version(7);
168 }
170 std::string MakeSeqSizeBlob(const std::string& blob_name) {
171  return blob_name + "_real_seq_size";
172 }
174 std::string MakeOutputForAdjustBatchOp(const std::string& input) {
175  return input + "_post_adjust_batch";
176 }
178 std::string MakeInputForAdjustBatchOp(const std::string& output) {
179  return output + "_pre_adjust_batch";
180 }
182 OperatorDef MakeAdjustBatchOp(
183  const std::string& input_blob,
184  const std::string& output_blob,
185  int max_batch_size,
186  const std::string& real_batch_size_blob,
187  bool adjust_to_max_batch_size) {
188  OperatorDef adjust_batch_op;
189  adjust_batch_op.set_type("AdjustBatch");
190  auto* arg = adjust_batch_op.add_arg();
191  arg->set_name("max_batch_size");
192  arg->set_i(max_batch_size);
193  adjust_batch_op.add_input(input_blob);
194  adjust_batch_op.add_output(output_blob);
195  if (adjust_to_max_batch_size) {
196  if (!real_batch_size_blob.empty()) {
197  adjust_batch_op.add_output(real_batch_size_blob);
198  }
199  } else {
200  adjust_batch_op.add_input(real_batch_size_blob);
201  }
202  return adjust_batch_op;
203 }
205 std::unordered_set<string> ToHashSet(
206  const ::google::protobuf::RepeatedPtrField<string>& strs) {
207  return std::unordered_set<string>(strs.begin(), strs.end());
208 }
210 int64_t GetBlob1stDimSize(
211  const ShapeInfo& shape_info,
212  const string& blob_name) {
213  if (shape_info.shape.dims_size() == 0) {
214  return 0;
215  } else {
216  return shape_info.shape.dims(0);
217  }
218 }
220 // Generates AdjustBatchOps for external inputs/outputs with type BATCH or
221 // SEQ and adds them to input_ops and output_ops.
222 // Meanwhile, modifies inputs/outputs of corresponding operators in the
223 // onnxifi_net to use the new inputs/outputs of AdjustBatchOps.
224 std::unordered_map<std::string, std::string> AddAdjustBatchOps(
225  const ShapeInfoMap& shape_hints,
226  NetDef* onnxifi_net,
227  vector<OperatorDef>* input_ops,
228  vector<OperatorDef>* output_ops) {
229  std::unordered_map<std::string, std::string> renaming_map;
230  const auto external_inputs = ToHashSet(onnxifi_net->external_input());
231  const auto external_outputs = ToHashSet(onnxifi_net->external_output());
232  std::unordered_set<std::string> real_batch_size_blobs;
233  std::unordered_set<std::string> post_adjust_inputs;
235  for (auto& op : *(onnxifi_net->mutable_op())) {
236  // Add AdjustBatchOp for all external inputs with type BATCH or SEQ.
237  // This will adjust the batch/seq size to the batch/seq size inferred by
238  // bound_shape_inference. Note that we only produce real batch size tensor
239  // once to avoid data race. In addition, for each input we only create one
240  // AdjustBatch op for the same reason.
241  for (auto& input_blob : *(op.mutable_input())) {
242  if (external_inputs.count(input_blob)) {
243  auto shape_info_it = shape_hints.find(input_blob);
244  if (shape_info_it == shape_hints.end()) {
245  LOG(WARNING) << "Cannot find shape_info for external input blob: "
246  << input_blob;
247  continue;
248  }
249  std::string real_batch_size_blob = "";
250  auto max_batch_size = 0;
251  if (shape_info_it->second.dim_type == ShapeInfo::DimType::BATCH) {
252  max_batch_size = GetBlob1stDimSize(shape_info_it->second, input_blob);
253  real_batch_size_blob =
254  kRealBatchSizeBlob + "_" + c10::to_string(max_batch_size);
255  } else if (shape_info_it->second.dim_type == ShapeInfo::DimType::SEQ) {
256  max_batch_size = GetBlob1stDimSize(shape_info_it->second, input_blob);
257  real_batch_size_blob = MakeSeqSizeBlob(input_blob);
258  } else {
259  continue;
260  }
262  auto output_blob = MakeOutputForAdjustBatchOp(input_blob);
263  auto ret = real_batch_size_blobs.emplace(real_batch_size_blob);
264  if (post_adjust_inputs.emplace(output_blob).second) {
265  input_ops->push_back(MakeAdjustBatchOp(
266  input_blob,
267  output_blob,
268  max_batch_size,
269  ret.second ? real_batch_size_blob : "",
270  true /* adjust_to_max_batch_size */));
271  }
272  renaming_map[input_blob] = output_blob;
273  input_blob = output_blob;
274  } else if (renaming_map.count(input_blob)) {
275  // It is possible that input of a certain op is the output of its
276  // predecessor op, which happens to be an external_output. In this case,
277  // the tensor would have been renamed to X_pre_batch_adjust. Therefore,
278  // we need to rename input X to X_pre_batch_adjust too.
279  input_blob = renaming_map[input_blob];
280  }
281  }
282  // Add AdjustBatchOp for all external outputs with type BATCH if the real
283  // batch size is presented. This will adjust the batch size to the
284  // original batch size.
285  for (auto& output_blob : *(op.mutable_output())) {
286  if (external_outputs.count(output_blob)) {
287  auto shape_info_it = shape_hints.find(output_blob);
289  shape_info_it != shape_hints.end(),
290  "Cannot find shape info for ",
291  output_blob,
292  " for AdjustBatchOp insertion");
293  if (shape_info_it->second.dim_type == ShapeInfo::DimType::BATCH) {
294  auto max_batch_size =
295  GetBlob1stDimSize(shape_info_it->second, output_blob);
296  std::string real_size_blob =
297  kRealBatchSizeBlob + "_" + c10::to_string(max_batch_size);
299  real_batch_size_blobs.count(real_size_blob),
300  output_blob,
301  ": Cannot find ",
302  real_size_blob,
303  " to make AdjustBatchOp");
304  auto input_blob = MakeInputForAdjustBatchOp(output_blob);
305  output_ops->push_back(MakeAdjustBatchOp(
306  input_blob,
307  output_blob,
308  max_batch_size,
309  real_size_blob,
310  false /* adjust_to_max_batch_size */));
311  renaming_map[output_blob] = input_blob;
312  output_blob = input_blob;
313  } else if (shape_info_it->second.dim_type == ShapeInfo::DimType::SEQ) {
314  LOG(WARNING) << "It's unusual that output tesnor " << output_blob
315  << " is of dim_type SEQ. "
316  << "AdjustBatchOp won't attached "
317  << "and it might degrade the performance";
318  }
319  }
320  }
321  }
323  return renaming_map;
324 }
326 NetDef ComposeResultNet(
327  const vector<OperatorDef>& input_ops,
328  const vector<OperatorDef>& output_ops,
329  const OperatorDef& onnxifi_op) {
330  NetDef net_opt;
331  for (const auto& op : input_ops) {
332  net_opt.add_op()->CopyFrom(op);
333  }
334  net_opt.add_op()->CopyFrom(onnxifi_op);
335  // Add AdjustBatch ops for output blobs to the net.
336  for (const auto& op : output_ops) {
337  net_opt.add_op()->CopyFrom(op);
338  }
339  return net_opt;
340 }
342 } // namespace
344 OnnxifiTransformer::OnnxifiTransformer(const OnnxifiTransformerOptions& opts)
345  : BackendTransformerBase(), opts_(opts) {
346  lib_ = onnx::initOnnxifiLibrary();
347  CAFFE_ENFORCE(lib_, "Cannot initialize ONNXIFI library");
349  lib_->onnxGetBackendIDs(nullptr, &num_backends_),
352  num_backends_, 0, "At least 1 onnxifi backend should be available");
353  backend_ids_.resize(num_backends_);
355  lib_->onnxGetBackendIDs(, &num_backends_),
357 }
359 OnnxifiTransformer::~OnnxifiTransformer() {
360  for (unsigned i = 0; i < num_backends_; ++i) {
361  if (lib_->onnxReleaseBackendID(backend_ids_[i]) != ONNXIFI_STATUS_SUCCESS) {
362  LOG(ERROR) << "Error when calling onnxReleaseBackendID";
363  }
364  }
365 }
367 OperatorDef OnnxifiTransformer::BuildOnnxifiOp(
368  const std::string& onnx_model_str,
369  const std::unordered_map<std::string, TensorShape>& output_shape_hints,
370  const std::unordered_set<std::string>& initialization_list,
371  const std::vector<std::string>& external_inputs,
372  const std::vector<std::string>& external_outputs) {
373  OperatorDef op;
374  op.set_type("Onnxifi");
375  auto* onnx_model_arg = op.add_arg();
376  onnx_model_arg->set_name("onnx_model");
377  onnx_model_arg->set_s(onnx_model_str);
379  // Add the names of the initializer blobs that we want to fetch from the
380  // workspace later
381  auto* initializers_arg = op.add_arg();
382  initializers_arg->set_name("initializers");
383  for (const auto& s : initialization_list) {
384  initializers_arg->add_strings(s);
385  }
387  // Add the input/output
388  auto* input_names = op.add_arg();
389  input_names->set_name("input_names");
390  for (const auto& input : external_inputs) {
391  if (!initialization_list.count(input)) {
392  op.add_input(input);
393  input_names->add_strings(input);
394  }
395  }
396  auto* output_names = op.add_arg();
397  output_names->set_name("output_names");
398  for (const auto& output : external_outputs) {
399  op.add_output(output);
400  output_names->add_strings(output);
401  }
403  // Add output size hints
404  for (int i = 0; i < op.output_size(); ++i) {
405  const auto& o = op.output(i);
406  const auto it = output_shape_hints.find(o);
407  if (it != output_shape_hints.end()) {
408  const auto& shape = it->second;
409  auto* output_shape_hint_arg = op.add_arg();
410  output_shape_hint_arg->set_name(c10::str("output_shape_hint_", i));
411  output_shape_hint_arg->add_ints(onnxifiDataType(shape.data_type()));
412  for (const auto& d : shape.dims()) {
413  output_shape_hint_arg->add_ints(d);
414  }
416  VLOG(2) << "Adding output hint: " << o;
417  }
418  }
420  // Tell Onnxifi op that the model is in onnx or c2 proto format
421  AddArgument("use_onnx", opts_.use_onnx ? 1 : 0, &op);
423  // Tell Onnxifi op which backend id to use
424  AddArgument("backend_id", idx_, &op);
426  // Add model_id and net_pos to the onnxifi model
427  AddArgument(kModelId, model_id_, &op);
428  AddArgument(kNetPos, c10::to_string(onnxifi_op_id_++), &op);
430  return op;
431 }
433 NetDef OnnxifiTransformer::SubnetToOnnxifiOpViaC2(
434  const caffe2::NetDef& net,
435  const std::unordered_set<std::string>& weights_in_ws,
436  const ShapeInfoMap& shape_hints) {
437  int onnxifi_op_id = onnxifi_op_id_;
438  if (opts_.debug) {
439  WriteProtoToTextFile(
440  net, "debug_original_net_" + c10::to_string(onnxifi_op_id) + ".pb_txt");
441  }
442  if (opts_.min_ops > net.op_size()) {
443  return net;
444  }
445  // We already have all the ops and external inputs and outputs!
446  NetDef onnxifi_net(net);
448  // Remove the second output of Concat/Reshape from external_output. In
449  // addition, we remove those outputs from the Onnxifi op too.
450  // TODO: This approach is a bit hacky as we assume that the second output is
451  // never used. A more appropriate approach can be learned from the ONNX path,
452  // where we statically computes the split_info given input shape and insert a
453  // GivenTensorIntFill op
454  std::unordered_set<std::string> split_infos;
455  for (auto& op : *onnxifi_net.mutable_op()) {
456  if ((op.type() == "Concat" || op.type() == "Reshape") &&
457  op.output_size() == 2) {
458  split_infos.emplace(op.output(1));
459  }
460  }
461  onnxifi_net.clear_external_output();
462  for (const auto& o : net.external_output()) {
463  if (!split_infos.count(o)) {
464  onnxifi_net.add_external_output(o);
465  }
466  }
468  // Insert AdjustBatch ops, note that this step will possibly change the names
469  // of the input/output, so we need to create a mapping and use the renamed
470  // names for external_inputs/outputs and input_shape_info for the onnxifi_net.
471  vector<OperatorDef> input_ops;
472  vector<OperatorDef> output_ops;
473  std::unordered_map<std::string, std::string> renaming_map;
474  if (opts_.add_adjust_batch_ops) {
475  renaming_map =
476  AddAdjustBatchOps(shape_hints, &onnxifi_net, &input_ops, &output_ops);
477  }
479  // Figure out weights and add it to external_inputs too
480  std::unordered_set<std::string> initialization_list;
481  std::vector<std::string> total_inputs_vec;
482  getWeightsAndInputs(
483  net,
484  weights_in_ws,
485  std::vector<std::string>(),
486  &initialization_list,
487  &total_inputs_vec);
488  auto* shape_arg = onnxifi_net.add_arg();
489  shape_arg->set_name("input_shape_info");
490  onnxifi_net.clear_external_input();
491  for (const auto& i : total_inputs_vec) {
492  auto input = i;
493  const auto it = renaming_map.find(i);
494  if (it != renaming_map.end()) {
495  input = it->second;
496  }
497  onnxifi_net.add_external_input(input);
498  shape_arg->mutable_tensors()->Add()->CopyFrom(
499  wrapShapeInfoIntoTensorProto(input,;
500  }
502  // Compute output shape hints
503  std::unordered_map<std::string, TensorShape> output_shape_hints;
504  for (auto& o : *onnxifi_net.mutable_external_output()) {
505  auto output = o;
506  const auto rit = renaming_map.find(o);
507  if (rit != renaming_map.end()) {
508  output = rit->second;
509  }
510  const auto it = shape_hints.find(o);
512  it != shape_hints.end(), "Cannot find shape info for output ", o);
513  const auto& shape = it->second.shape;
514  output_shape_hints.emplace(output, shape);
515  o = output;
516  }
518  // Build ONNXIFI Op
519  std::vector<std::string> onnxifi_net_inputs(
520  onnxifi_net.external_input().begin(), onnxifi_net.external_input().end());
521  std::vector<std::string> onnxifi_net_outputs(
522  onnxifi_net.external_output().begin(),
523  onnxifi_net.external_output().end());
524  std::string model_str;
525  onnxifi_net.SerializeToString(&model_str);
526  auto onnxifi_op = BuildOnnxifiOp(
527  model_str,
528  output_shape_hints,
529  initialization_list,
530  onnxifi_net_inputs,
531  onnxifi_net_outputs);
532  NetDef net_opt = ComposeResultNet(input_ops, output_ops, onnxifi_op);
534  // Debugging stuff
535  if (opts_.debug) {
536  WriteProtoToTextFile(
537  onnxifi_net,
538  "debug_onnxifi_net_" + c10::to_string(onnxifi_op_id) + ".pb_txt");
539  WriteProtoToTextFile(
540  net_opt,
541  "debug_optimized_net_" + c10::to_string(onnxifi_op_id) + ".pb_txt");
542  }
543  return net_opt;
544 }
546 NetDef OnnxifiTransformer::SubnetToOnnxifiOpViaOnnx(
547  const caffe2::NetDef& net,
548  const std::unordered_set<std::string>& weights_in_ws,
549  Workspace* ws,
550  onnx::OnnxExporter* exporter,
551  ShapeInfoMap* shape_hints) {
552  if (opts_.min_ops > net.op_size()) {
553  return net;
554  }
555  ::ONNX_NAMESPACE::ModelProto onnx_model;
556  fillModelInfo(&onnx_model);
558  caffe2::NetDef onnxifi_net(net);
559  vector<OperatorDef> input_ops;
560  vector<OperatorDef> output_ops;
561  auto renaming_map =
562  AddAdjustBatchOps(*shape_hints, &onnxifi_net, &input_ops, &output_ops);
563  for (const auto& kv : renaming_map) {
564  shape_hints_onnx_.emplace(kv.second,;
565  }
567  // Convert c2 ops to onnx ops, add const weights if there are any
568  DeviceOption option;
569  CPUContext context(option);
570  context.SwitchToDevice();
571  std::vector<std::string> extra_weights;
572  for (const auto& op : onnxifi_net.op()) {
573  const auto results = exporter->Caffe2OpToOnnxNodes(op, shape_hints_onnx_);
574  for (const auto& n : results.first) {
575  onnx_model.mutable_graph()->add_node()->CopyFrom(n);
576  }
577  for (const auto& t : results.second) {
578  VLOG(2) << "Adding extra init tensor: " <<;
579  TensorShape shape;
580  shape.mutable_dims()->CopyFrom(t.dims());
581  auto ret = shape_hints_onnx_.emplace(, std::move(shape));
582  shape_hints->emplace(
583  std::piecewise_construct,
584  std::forward_as_tuple(ret.first->first),
585  std::forward_as_tuple(
586  ShapeInfo::DimType::CONSTANT, ret.first->second));
588  // Feed into workspace as CPU Tensors
589  auto* blob = ws->CreateBlob(;
590  auto* cpu_tensor = BlobGetMutableTensor(blob, CPU);
591  std::vector<int64_t> dims;
592  for(const auto& d : t.dims()) {
593  dims.push_back(d);
594  }
595  cpu_tensor->Resize(dims);
596  if (t.data_type() == ::ONNX_NAMESPACE::TensorProto::FLOAT) {
597  context.CopyBytesSameDevice(
598  cpu_tensor->numel() * sizeof(float),
599  static_cast<const void*>(t.raw_data().data()),
600  cpu_tensor->raw_mutable_data(TypeMeta::Make<float>()));
601  } else if (t.data_type() == ::ONNX_NAMESPACE::TensorProto::INT64) {
602  context.CopyBytesSameDevice(
603  cpu_tensor->numel() * sizeof(int64_t),
604  static_cast<const void*>(t.raw_data().data()),
605  cpu_tensor->raw_mutable_data(TypeMeta::Make<int64_t>()));
606  } else {
608  "Unsupported tensor data type for conversion: ", t.data_type());
609  }
610  context.FinishDeviceComputation();
612  // Add mappings
613  extra_weights.emplace_back(;
614  }
615  }
617  // Convert outputs and compute output shape hints
618  std::vector<std::string> onnxifi_net_outputs;
619  for (const auto& o : net.external_output()) {
620  auto output = o;
621  const auto it = renaming_map.find(o);
622  if (it != renaming_map.end()) {
623  output = it->second;
624  }
625  onnxifi_net_outputs.emplace_back(output);
626  }
627  auto io_vec = convertToValueInfo(
628  onnxifi_net_outputs,
629  shape_hints_onnx_,
630  std::unordered_map<std::string, ::ONNX_NAMESPACE::TypeProto>());
631  std::unordered_map<std::string, TensorShape> output_shape_hints;
632  for (const auto& i : io_vec) {
633  onnx_model.mutable_graph()->add_output()->CopyFrom(i);
634  const auto it = shape_hints_onnx_.find(;
636  it != shape_hints_onnx_.end(),
637  "Cannot find shape info for output ",
639  const auto& shape = it->second;
640  output_shape_hints.emplace(, shape);
641  }
643  // Convert inputs and figure out weights
644  std::unordered_set<std::string> initialization_list;
645  std::vector<std::string> onnxifi_net_inputs;
646  getWeightsAndInputs(
647  net,
648  weights_in_ws,
649  extra_weights,
650  &initialization_list,
651  &onnxifi_net_inputs);
652  for (auto& i : onnxifi_net_inputs) {
653  const auto it = renaming_map.find(i);
654  if (it != renaming_map.end()) {
655  i = it->second;
656  }
657  }
658  io_vec = convertToValueInfo(
659  onnxifi_net_inputs,
660  shape_hints_onnx_,
661  std::unordered_map<std::string, ::ONNX_NAMESPACE::TypeProto>());
662  for (const auto& i : io_vec) {
663  onnx_model.mutable_graph()->add_input()->CopyFrom(i);
664  }
666  // Onnx model is ready. Build ONNXIFI Op
667  std::string model_str;
668  onnx_model.SerializeToString(&model_str);
669  auto onnxifi_op = BuildOnnxifiOp(
670  model_str,
671  output_shape_hints,
672  initialization_list,
673  onnxifi_net_inputs,
674  onnxifi_net_outputs);
675  NetDef net_opt = ComposeResultNet(input_ops, output_ops, onnxifi_op);
677  // Debugging stuff
678  if (opts_.debug) {
679  WriteProtoToTextFile(onnx_model, "debug_onnxifi_net.onnx_txt");
680  WriteProtoToTextFile(net_opt, "debug_optimized_net.pb_txt");
681  }
682  return net_opt;
683 }
685 bool OnnxifiTransformer::supportOpOnnx(
686  const caffe2::OperatorDef& op,
687  onnx::OnnxExporter* exporter,
688  const std::unordered_set<int>& blacklisted_ops,
689  onnxBackendID backend_id) const {
690  try {
691  int pos =
692  ArgumentHelper::GetSingleArgument<OperatorDef, int>(op, kNetPos, -1);
693  if (blacklisted_ops.count(pos)) {
694  LOG(INFO) << "Skipping blacklisted op " << op.type() << " at pos " << pos;
695  return false;
696  }
697  const OpSchema* schema = OpSchemaRegistry::Schema(op.type());
698  // NB: this might not be a hard constraint as we can just export C2
699  // domain specific ops to ONNX
700  if (!schema || schema->onnx_schema().empty()) {
701  LOG(INFO) << "Cannot export c2 op " << op.type()
702  << " to onnx as there is no corresponding ONNX schema.";
703  return false;
704  }
706  ::ONNX_NAMESPACE::ModelProto onnx_model;
707  fillModelInfo(&onnx_model);
708  auto results = exporter->Caffe2OpToOnnxNodes(op, shape_hints_onnx_);
709  std::unordered_set<std::string> used_inputs;
710  std::unordered_set<std::string> used_outputs;
711  std::vector<std::string> boundary_inputs;
712  std::vector<std::string> boundary_outputs;
713  std::unordered_set<std::string> reshape_info;
714  // nodes are in topological order, so we just need to iterate
715  for (const auto& n : results.first) {
716  onnx_model.mutable_graph()->add_node()->CopyFrom(n);
717  for (const auto& i : n.input()) {
718  bool is_new = used_inputs.emplace(i).second;
719  // The input is not seen and it's not referred by any nodes before as
720  // output, we count it as an boudary input
721  if (is_new && !used_outputs.count(i)) {
722  boundary_inputs.emplace_back(i);
723  }
724  }
725  for (const auto& o : n.output()) {
726  used_outputs.emplace(o);
727  }
729  // For reshape node, if it has more than 1 inputs, we need to feed the
730  // second input which contains the shape info
731  if (n.op_type() == "Reshape" && n.input_size() > 1) {
732  reshape_info.emplace(n.input(1));
733  }
734  }
735  // Second iteration to account all the boundary outputs, which is a newly
736  // seen output and is not referred as input before
737  used_outputs.clear();
738  for (const auto& n : results.first) {
739  for (const auto& o : n.output()) {
740  bool is_new = used_outputs.emplace(o).second;
741  if (is_new && !used_inputs.count(o)) {
742  boundary_outputs.emplace_back(o);
743  }
744  }
745  }
746  std::unordered_map<std::string, ::ONNX_NAMESPACE::TypeProto>
747  extra_shape_hints;
748  for (const auto& t : results.second) {
749  extra_shape_hints.emplace(, onnx::ExtraTypeProto(t));
750  if (reshape_info.count( {
751  onnx_model.mutable_graph()->add_initializer()->CopyFrom(t);
752  }
753  }
755  // Add input/output shape info
756  auto io_vec = convertToValueInfo(
757  boundary_inputs, shape_hints_onnx_, extra_shape_hints);
758  for (const auto& i : io_vec) {
759  onnx_model.mutable_graph()->add_input()->CopyFrom(i);
760  }
761  io_vec = convertToValueInfo(
762  boundary_outputs, shape_hints_onnx_, extra_shape_hints);
763  for (const auto& i : io_vec) {
764  onnx_model.mutable_graph()->add_output()->CopyFrom(i);
765  }
767  std::string onnx_model_str;
768  onnx_model.SerializeToString(&onnx_model_str);
769  auto ret = lib_->onnxGetBackendCompatibility(
770  backend_id, onnx_model_str.size(), onnx_model_str.c_str());
771  if (ret != ONNXIFI_STATUS_SUCCESS) {
772  LOG(INFO) << "Don't support onnx for " << op.type() << " c2 op (" << ret
773  << ")";
774  return false;
775  } else {
776  return true;
777  }
778  } catch (const std::exception& ex) {
779  LOG(ERROR) << "Caught exception when converting op " << op.type()
780  << ", what: " << ex.what();
781  return false;
782  }
783 }
785 bool OnnxifiTransformer::supportOpC2(
786  const caffe2::OperatorDef& op,
787  const ShapeInfoMap& shape_hints,
788  const std::unordered_set<int>& blacklisted_ops,
789  onnxBackendID backend_id) const {
790  try {
791  int pos =
792  ArgumentHelper::GetSingleArgument<OperatorDef, int>(op, kNetPos, -1);
793  if (blacklisted_ops.count(pos)) {
794  LOG(INFO) << "Skipping blacklisted op " << op.type() << " at pos " << pos;
795  return false;
796  }
798  // Build a c2 net with one op
799  NetDef net;
800  net.add_op()->CopyFrom(op);
801  for (const auto& i : op.input()) {
802  net.add_external_input(i);
803  }
804  for (const auto& o : op.output()) {
805  net.add_external_output(o);
806  }
807  // Remove the second output of Concat/Reshape from the external_output
808  if ((op.type() == "Concat" || op.type() == "Reshape") &&
809  op.output_size() == 2) {
810  net.mutable_external_output()->RemoveLast();
811  }
813  // Encode the input/output shapes to an argument
814  auto* shape_arg = net.add_arg();
815  shape_arg->set_name("input_shape_info");
816  for (const auto& i : op.input()) {
817  const auto it = shape_hints.find(i);
818  if (it == shape_hints.end()) {
819  return false;
820  }
821  shape_arg->mutable_tensors()->Add()->CopyFrom(
822  wrapShapeInfoIntoTensorProto(i, it->second));
823  }
824  shape_arg = net.add_arg();
825  shape_arg->set_name("output_shape_info");
826  for (const auto& i : op.output()) {
827  const auto it = shape_hints.find(i);
828  if (it == shape_hints.end()) {
829  return false;
830  }
831  shape_arg->mutable_tensors()->Add()->CopyFrom(
832  wrapShapeInfoIntoTensorProto(i, it->second));
833  }
835  std::string c2_model_str;
836  net.SerializeToString(&c2_model_str);
837  auto ret = lib_->onnxGetBackendCompatibility(
838  backend_id, c2_model_str.size(), c2_model_str.c_str());
839  if (ret != ONNXIFI_STATUS_SUCCESS) {
840  LOG(INFO) << "Don't support c2 op " << op.type() << " (" << ret << ")";
841  return false;
842  } else {
843  return true;
844  }
845  } catch (const std::exception& ex) {
846  LOG(ERROR) << "Caught exception when converting op " << op.type()
847  << ", what: " << ex.what();
848  return false;
849  }
850 }
852 void OnnxifiTransformer::tieGatherAndSparseLengthsWeightedSumOps(
853  const NetDef& net,
854  const ShapeInfoMap& shape_hints,
855  std::unordered_set<int>* blacklisted_ops) const {
856  std::unordered_map<std::string, int> output_pos;
857  onnx::OnnxExporter exporter(nullptr);
858  onnxBackendID backend_id = backend_ids_[idx_];
860  for (const auto& op : net.op()) {
861  if (op.type() == "Gather") {
862  int pos =
863  ArgumentHelper::GetSingleArgument<OperatorDef, int>(op, kNetPos, -1);
864  for (const auto& output : op.output()) {
865  output_pos.emplace(output, pos);
866  }
867  } else if (StartsWith(op.type(), "SparseLengthsWeighted")) {
868  auto supported = opts_.use_onnx
869  ? supportOpOnnx(op, &exporter, *blacklisted_ops, backend_id)
870  : supportOpC2(op, shape_hints, *blacklisted_ops, backend_id);
871  if (!supported && op.input_size() > 1) {
872  const auto it = output_pos.find(op.input(1));
873  if (it == output_pos.end()) {
874  continue;
875  }
876  blacklisted_ops->emplace(it->second);
877  // We know that current op is not going to be supported. Might as well
878  // blacklist it too
879  blacklisted_ops->emplace(
880  ArgumentHelper::GetSingleArgument<OperatorDef, int>(
881  op, kNetPos, -1));
882  }
883  }
884  }
885 }
887 void OnnxifiTransformer::applyFilteringRules(
888  const NetDef& net,
889  const ShapeInfoMap& shape_hints,
890  std::unordered_set<int>* blacklisted_ops) const {
891  tieGatherAndSparseLengthsWeightedSumOps(net, shape_hints, blacklisted_ops);
892 }
894 void OnnxifiTransformer::getBackendId() {
895  idx_ = 0;
897  if (opts_.use_onnx) {
898  return;
899  }
900  // Try to find a backend that support Caffe2 proto. Note that this is quite
901  // opportunistic as we don't offcially support Caffe2 proto.
902  char buf[kBufferSize];
903  for (int i = 0; i < backend_ids_.size(); ++i) {
904  size_t len = kBufferSize;
905  auto ret = lib_->onnxGetBackendInfo(
906  backend_ids_[i], ONNXIFI_BACKEND_DEVICE, buf, &len);
907  if (ret == ONNXIFI_STATUS_SUCCESS && strstr(buf, "Caffe2")) {
908  LOG(INFO) << "Using backend with Caffe2 Proto, ID: " << i;
909  idx_ = i;
910  break;
911  }
912  }
913 }
915 NetDef OnnxifiTransformer::TransformViaC2(
916  NetDef* pred_net,
917  const std::unordered_set<std::string>& weights,
918  const std::unordered_set<int>& blacklisted_ops,
919  const ShapeInfoMap& shape_hints) {
920  onnxBackendID backend_id = backend_ids_[idx_];
922  auto c2_supports = [this, &shape_hints, &blacklisted_ops, backend_id](
923  const caffe2::OperatorDef& op) {
924  return supportOpC2(op, shape_hints, blacklisted_ops, backend_id);
925  };
927  auto c2_converter =
928  [this, &weights, &shape_hints](const caffe2::NetDef& net) {
929  return SubnetToOnnxifiOpViaC2(net, weights, shape_hints);
930  };
932  return opt::OptimizeForBackend(*pred_net, c2_supports, c2_converter);
933 }
935 NetDef OnnxifiTransformer::TransformViaOnnx(
936  Workspace* ws,
937  NetDef* pred_net,
938  const std::unordered_set<std::string>& weights,
939  const std::unordered_set<int>& blacklisted_ops,
940  ShapeInfoMap* shape_hints) {
941  onnxBackendID backend_id = backend_ids_[idx_];
943  // function to tell whether the ONNXIFI backend supports a given C2 op or not
944  onnx::OnnxExporter exporter(nullptr);
945  auto onnx_supports = [this, &exporter, &blacklisted_ops, backend_id](
946  const caffe2::OperatorDef& op) {
947  return supportOpOnnx(op, &exporter, blacklisted_ops, backend_id);
948  };
950  // function to convert runnable subgraph into an onnxifi op. We need to keep
951  // the same exporter throughout the process to avoid duplicated dummy name
952  // generation
953  onnx::OnnxExporter exporter2(nullptr);
954  auto onnx_converter = [this, ws, &weights, shape_hints, &exporter2](
955  const caffe2::NetDef& net) mutable {
956  return SubnetToOnnxifiOpViaOnnx(net, weights, ws, &exporter2, shape_hints);
957  };
959  return opt::OptimizeForBackend(
960  *pred_net, onnx_supports, onnx_converter, opts_.debug);
961 }
963 // Cutting off the runnable part and replace with ONNXIFI ops. Asssume the nets
964 // were topologically sorted
965 void OnnxifiTransformer::transform(
966  Workspace* ws,
967  NetDef* pred_net,
968  const std::vector<std::string>& weight_names,
969  const std::unordered_map<std::string, TensorShape>& input_shape_hints,
970  const std::unordered_set<int>& blacklisted_ops) {
972  CAFFE_ENFORCE(pred_net, "Predict net cannot be nullptr");
974  // Get model id and reset Onnxifi op id to 0
975  model_id_ = getModelId(*pred_net);
976  onnxifi_op_id_ = 0;
978  // Unroll If ops
979  unrollIfOps(pred_net);
981  std::unordered_set<std::string> weights(
982  weight_names.begin(), weight_names.end());
984  // SSA Rewrite the net
985  auto shape_hints_mapped =
986  ssaRewriteAndMapNames(ws, pred_net, input_shape_hints);
988  // Populate shape info
989  // TODO(yingz): We should not need to create mapped_ws since we did not change
990  // any input mappings during ssarewrite. However this is here for the
991  // following reason: BlackBoxPredictor calls RunNetOnce before onnxifi to
992  // populate dimension info. However during this, it was observed, that new
993  // blob for output is created. This causes problem if inferShape uses original
994  // ws since it does not expect the output blob to be present.
995  Workspace mapped_ws(ws, input_mapping_);
996  ShapeInfoMap shape_hints = inferShapes(
997  &mapped_ws, pred_net, shape_hints_mapped, opts_.bound_shape_spec);
998  if (opts_.use_onnx) {
999  shape_hints_onnx_ = stripShapeInfoMap(shape_hints);
1000  }
1002  if (opts_.debug) {
1003  NetDef shape_net(*pred_net);
1004  auto* shape_arg = shape_net.add_arg();
1005  shape_arg->set_name("shape_info");
1006  for (const auto& kv : shape_hints) {
1007  auto t = wrapShapeInfoIntoTensorProto(kv.first, kv.second);
1008  t.add_int32_data(static_cast<int32_t>(kv.second.dim_type));
1009  shape_arg->mutable_tensors()->Add()->CopyFrom(t);
1010  }
1011  WriteProtoToTextFile(shape_net, "debug_ssa_net.pb_txt");
1012  }
1014  // Get backend id
1015  getBackendId();
1017  // Apply some filtering rules
1018  std::unordered_set<int> new_blacklisted_ops(
1019  blacklisted_ops.begin(), blacklisted_ops.end());
1020  applyFilteringRules(*pred_net, shape_hints, &new_blacklisted_ops);
1022  // Transform the net
1023  NetDef net_opt = opts_.use_onnx
1024  ? TransformViaOnnx(
1025  ws, pred_net, weights, new_blacklisted_ops, &shape_hints)
1026  : TransformViaC2(pred_net, weights, new_blacklisted_ops, shape_hints);
1028  // Need to figure out a proper place to handle device option
1029  net_opt.mutable_device_option()->CopyFrom(pred_net->device_option());
1031  if (opts_.debug) {
1032  WriteProtoToTextFile(net_opt, "debug_full_opt_net.pb_txt");
1033  }
1034  pred_net->Swap(&net_opt);
1035 }
1037 } // namespace caffe2
A global dictionary that holds information about what Caffe2 modules have been loaded in the current ...
Definition: blob.h:13