Caffe2 - C++ API
A deep learning, cross platform ML framework
nnapi.cc
1 
17 #include "caffe2/core/operator.h"
18 #include "caffe2/core/tensor.h"
19 #include "caffe2/core/types.h"
20 #include "caffe2/utils/proto_utils.h"
21 
22 #include "nnapi.h"
23 
24 namespace {
25 // Bug: ANEURALNETWORKS_UNMAPPABLE and ANEURALNETWORKS_OP_FAILED share the same
26 // enum value
27 void reportError(int result_code) {
28  switch (result_code) {
29  case ANEURALNETWORKS_NO_ERROR:
30  break;
31  case ANEURALNETWORKS_OUT_OF_MEMORY:
32  CAFFE_THROW("out of memory");
33  case ANEURALNETWORKS_INCOMPLETE:
34  CAFFE_THROW("incomplete");
35  case ANEURALNETWORKS_UNEXPECTED_NULL:
36  CAFFE_THROW("unexpected null");
37  case ANEURALNETWORKS_BAD_DATA:
38  CAFFE_THROW("bad data");
39  case ANEURALNETWORKS_OP_FAILED:
40  CAFFE_THROW("op failed or unmappable");
41  case ANEURALNETWORKS_BAD_STATE:
42  CAFFE_THROW("bad state");
43  default:
44  CAFFE_THROW("unknown error");
45  }
46 }
47 } // namespace
48 
49 namespace caffe2 {
50 
51 bool NNApi::loadNNApiLibrary() {
52  return dlnnapi_load(&libnnapi_, DLNNAPI_FLAG_VERSION_27);
53 }
54 
55 NNApi::~NNApi() {
56  if (run_end_) {
57  libnnapi_.ANeuralNetworksEvent_free(run_end_);
58  }
59  if (run_) {
60  libnnapi_.ANeuralNetworksExecution_free(run_);
61  }
62  if (compilation_) {
63  libnnapi_.ANeuralNetworksCompilation_free(compilation_);
64  }
65  if (model_) {
66  libnnapi_.ANeuralNetworksModel_free(model_);
67  }
68 }
69 
70 bool NNApi::run(const TensorVector& inputs, TensorVector* outputs) {
71  CAFFE_ENFORCE(inputs.size() <= run_net_.external_input_size());
72  try {
73  init(inputs, outputs);
74  } catch (const std::exception& e) {
75  LOG(ERROR) << "Error duing model initialization: " << e.what();
76  return false;
77  }
78 
79  try {
80  VLOG(1) << "Start compute";
81  int result_code =
82  libnnapi_.ANeuralNetworksExecution_startCompute(run_, &run_end_);
83  if (result_code != ANEURALNETWORKS_NO_ERROR) {
84  reportError(result_code);
85  }
86  result_code = libnnapi_.ANeuralNetworksEvent_wait(run_end_);
87  if (result_code != ANEURALNETWORKS_NO_ERROR) {
88  reportError(result_code);
89  }
90  VLOG(1) << "Finish compute";
91  } catch (const std::exception& e) {
92  LOG(ERROR) << "Error during model run: " << e.what();
93  return false;
94  }
95  return true;
96 }
97 
98 void NNApi::getConvPoolArgs(const ArgumentHelper& helper, ConvPoolArgs& args) {
99  std::vector<int> kernel(helper.GetRepeatedArgument<int>("kernels"));
100  std::vector<int> stride(helper.GetRepeatedArgument<int>("strides"));
101  std::vector<int> pads(helper.GetRepeatedArgument<int>("pads"));
102 
103  // Get old arguments values
104  if (helper.HasArgument("kernel")) {
105  kernel.resize(2, helper.GetSingleArgument<int>("kernel", 0));
106  } else if (helper.HasArgument("kernelh") && helper.HasArgument("kernelw")) {
107  kernel.push_back(helper.GetSingleArgument<int>("kernelh", 0));
108  kernel.push_back(helper.GetSingleArgument<int>("kernelw", 0));
109  }
110 
111  if (helper.HasArgument("stride")) {
112  stride.resize(2, helper.GetSingleArgument<int>("stride", 0));
113  } else if (helper.HasArgument("stride_h") && helper.HasArgument("stride_w")) {
114  stride.push_back(helper.GetSingleArgument<int>("stride_h", 0));
115  stride.push_back(helper.GetSingleArgument<int>("stride_w", 0));
116  }
117 
118  if (helper.HasArgument("pad")) {
119  pads.resize(4, helper.GetSingleArgument<int>("pad", 0));
120  } else if (
121  helper.HasArgument("pad_t") && helper.HasArgument("pad_l") &&
122  helper.HasArgument("pad_b") && helper.HasArgument("pad_r")) {
123  pads.push_back(helper.GetSingleArgument<int>("pad_t", 0));
124  pads.push_back(helper.GetSingleArgument<int>("pad_l", 0));
125  pads.push_back(helper.GetSingleArgument<int>("pad_b", 0));
126  pads.push_back(helper.GetSingleArgument<int>("pad_r", 0));
127  }
128 
129  // Commit values
130  args.kernel_h = kernel.size() > 0 ? kernel[0] : 1;
131  args.kernel_w = kernel.size() > 1 ? kernel[1] : args.kernel_h;
132  args.stride_x = stride.size() > 0 ? stride[0] : 1;
133  args.stride_y = stride.size() > 1 ? stride[1] : 1;
134  args.pad_t = pads.size() > 0 ? pads[0] : 0;
135  args.pad_l = pads.size() > 1 ? pads[1] : 0;
136  args.pad_b = pads.size() > 2 ? pads[2] : 0;
137  args.pad_r = pads.size() > 3 ? pads[3] : 0;
138 }
139 
140 void NNApi::addPooling(
141  const OperatorDef& op,
142  OperationCode op_code,
143  bool fuse_relu)
144 // clang-format off
145 {
146  // clang-format on
147  VLOG(1) << "Add AveragePool to NN model";
148  CAFFE_ENFORCE_EQ(op.input_size(), 1);
149  CAFFE_ENFORCE_EQ(op.output_size(), 1);
150  ArgumentHelper helper(op);
151  StorageOrder order = StringToStorageOrder(
152  helper.GetSingleArgument<std::string>("order", "NCHW"));
153  if (order == NCHW) {
154  CAFFE_THROW("NN API supports NHWC only");
155  }
156 
157  ConvPoolArgs args;
158  getConvPoolArgs(helper, args);
159  CAFFE_ENFORCE_EQ(
160  args.stride_x,
161  args.stride_y,
162  "NN API only supports stride_x == stride_y");
163 
164  // add input operands to model
165  const uint32_t input_indices_count = 10;
166  const uint32_t output_indices_count = 1;
167  uint32_t input_indices[input_indices_count];
168  uint32_t output_indices[output_indices_count];
169 
170  uint32_t idx = 0;
171  // input
172  const std::string& input = op.input(0);
173  const std::vector<uint32_t>& input_dims = tensor_dims_[input];
174  input_indices[idx++] = operand_map_[input];
175 
176  CAFFE_ENFORCE_EQ(input_dims.size(), 4);
177  uint32_t batches = input_dims[0];
178  uint32_t input_height = input_dims[1];
179  uint32_t input_width = input_dims[2];
180  uint32_t channel = input_dims[3];
181 
182  // pads in the order of left, right, top, bottom
183  input_indices[idx++] = addScalarOperand(args.pad_l);
184  input_indices[idx++] = addScalarOperand(args.pad_r);
185  input_indices[idx++] = addScalarOperand(args.pad_t);
186  input_indices[idx++] = addScalarOperand(args.pad_b);
187 
188  // strides
189  input_indices[idx++] = addScalarOperand(args.stride_x);
190  input_indices[idx++] = addScalarOperand(args.stride_y);
191 
192  // kernel size
193  input_indices[idx++] = addScalarOperand(args.kernel_h);
194  input_indices[idx++] = addScalarOperand(args.kernel_w);
195 
196  // fuse relu
199  input_indices[idx] = addScalarOperand(fuse);
200 
201  // output
202  uint32_t output_height =
203  (input_height - args.kernel_h + args.pad_t + args.pad_b) / args.stride_y +
204  1;
205  uint32_t output_width =
206  (input_width - args.kernel_w + args.pad_l + args.pad_r) / args.stride_x +
207  1;
208 
209  float output_scale = helper.GetSingleArgument<float>("output_scale", 1.0);
210  int output_zero_point = helper.GetSingleArgument<int>("output_zero_point", 0);
211 
212  std::vector<uint32_t> dims({batches, output_height, output_width, channel});
213  output_indices[0] = addTensorOperand(
214  op.output(0), tensor_type_, dims, output_scale, output_zero_point);
215 
216  int result_code = libnnapi_.ANeuralNetworksModel_addOperation(
217  model_, op_code, input_indices_count, input_indices, 1, output_indices);
218  if (result_code != ANEURALNETWORKS_NO_ERROR) {
219  reportError(result_code);
220  }
221 }
222 
223 void NNApi::addConv(const OperatorDef& op, bool fuse_relu) {
224  VLOG(1) << "Add Conv to NN model";
225  CAFFE_ENFORCE_EQ(op.input_size(), 3);
226  CAFFE_ENFORCE_EQ(op.output_size(), 1);
227 
228  ArgumentHelper helper(op);
229  StorageOrder order = StringToStorageOrder(
230  helper.GetSingleArgument<std::string>("order", "NCHW"));
231  CAFFE_ENFORCE_EQ(order, NHWC, "NN API supports NHWC only");
232 
233  // input
234  const std::string& input = op.input(0);
235  const std::vector<uint32_t>& input_dims = tensor_dims_[input];
236 
237  CAFFE_ENFORCE_EQ(input_dims.size(), 4);
238  uint32_t batches = input_dims[0];
239  uint32_t input_height = input_dims[1];
240  uint32_t input_width = input_dims[2];
241  uint32_t input_channel = input_dims[3];
242 
243  uint32_t group = helper.GetSingleArgument<int>("group", 1);
244 
245  bool run_depthwise = false;
246  if (group > 1) {
247  CAFFE_ENFORCE_EQ(
248  group,
249  input_channel,
250  "NN API doesn't support non-depthwise convolution with groups");
251  run_depthwise = true;
252  }
253 
254  ConvPoolArgs args;
255  getConvPoolArgs(helper, args);
256 
257  CAFFE_ENFORCE_EQ(
258  args.stride_x,
259  args.stride_y,
260  "NN API only supports stride_x == stride_y");
261 
262  vector<int> dilation(helper.GetRepeatedArgument<int>("dilations"));
263  if (helper.HasArgument("dilation")) {
264  dilation.resize(2, helper.GetSingleArgument<int>("dilation", 0));
265  } else if (
266  helper.HasArgument("dilationh") && helper.HasArgument("dilationw")) {
267  dilation.push_back(helper.GetSingleArgument<int>("dilation_h", 0));
268  dilation.push_back(helper.GetSingleArgument<int>("dilation_w", 0));
269  }
270 
271  for (auto d : dilation) {
272  CAFFE_ENFORCE_EQ(d, 1, "NN API only supports dialation == 1");
273  }
274 
275  // add input operands to model
276  const uint32_t input_indices_count = run_depthwise ? 11 : 10;
277  const uint32_t output_indices_count = 1;
278  uint32_t input_indices[input_indices_count];
279  uint32_t output_indices[output_indices_count];
280 
281  uint32_t idx = 0;
282  // input
283  input_indices[idx++] = operand_map_[input];
284 
285  // weight
286  const std::string& weight_name = op.input(1);
287  const auto& weight = ws_.GetBlob(weight_name)->Get<TensorCPU>();
288  std::vector<uint32_t> weight_dims;
289  for (auto dim : weight.dims()) {
290  weight_dims.push_back(dim);
291  }
292  CAFFE_ENFORCE_EQ(weight_dims.size(), 4);
293  uint32_t num_kernels = weight_dims[0];
294  uint32_t kernel_h = weight_dims[1];
295  uint32_t kernel_w = weight_dims[2];
296  uint32_t kernel_depth = weight_dims[3];
297  CAFFE_ENFORCE_EQ(input_channel, kernel_depth);
298  if (run_depthwise) {
299  CAFFE_ENFORCE_EQ(num_kernels, 1);
300  }
301 
302  float weight_scale = helper.GetSingleArgument<float>("weight_scale", 1.0);
303  int weight_zero_point = helper.GetSingleArgument<int>("weight_zero_point", 0);
304 
305  uint32_t weight_idx = addTensorOperand(
306  weight_name, tensor_type_, weight_dims, weight_scale, weight_zero_point);
307 
308  int result_code = libnnapi_.ANeuralNetworksModel_setOperandValue(
309  model_, weight_idx, weight.raw_data(), weight.nbytes());
310  if (result_code != ANEURALNETWORKS_NO_ERROR) {
311  reportError(result_code);
312  }
313  input_indices[idx++] = weight_idx;
314 
315  // bias
316  const std::string& bias_name = op.input(2);
317  const auto& bias = ws_.GetBlob(bias_name)->Get<TensorCPU>();
318  std::vector<uint32_t> bias_dims;
319  CAFFE_ENFORCE_EQ(bias.ndim(), 1);
320  uint32_t bias_size = bias.dim(0);
321  if (!run_depthwise) {
322  CAFFE_ENFORCE_EQ(num_kernels, bias_size);
323  } else {
324  CAFFE_ENFORCE_EQ(kernel_depth, bias_size);
325  }
326  bias_dims.push_back(bias_size);
327 
328  OperandCode bias_type = tensor_type_ == ANEURALNETWORKS_TENSOR_FLOAT32
331  if (bias_type == ANEURALNETWORKS_TENSOR_FLOAT32) {
332  CAFFE_ENFORCE(bias.IsType<float>());
333  } else if (bias_type == ANEURALNETWORKS_TENSOR_INT32) {
334  CAFFE_ENFORCE(bias.IsType<int>());
335  }
336  uint32_t bias_idx = addTensorOperand(bias_name, bias_type, bias_dims);
337 
338  result_code = libnnapi_.ANeuralNetworksModel_setOperandValue(
339  model_, bias_idx, bias.raw_data(), bias.nbytes());
340  if (result_code != ANEURALNETWORKS_NO_ERROR) {
341  reportError(result_code);
342  }
343  input_indices[idx++] = bias_idx;
344 
345  // pads in the order of left, right, top, bottom
346  input_indices[idx++] = addScalarOperand(args.pad_l);
347  input_indices[idx++] = addScalarOperand(args.pad_r);
348  input_indices[idx++] = addScalarOperand(args.pad_t);
349  input_indices[idx++] = addScalarOperand(args.pad_b);
350 
351  // strides
352  input_indices[idx++] = addScalarOperand(args.stride_x);
353  input_indices[idx++] = addScalarOperand(args.stride_y);
354 
355  // depth_wise
356  if (run_depthwise) {
357  // depthwise multiplier == 1
358  input_indices[idx++] = addScalarOperand(1);
359  }
360 
361  // fuse relu
364  input_indices[idx] = addScalarOperand(fuse);
365 
366  // output
367  uint32_t output_channel = run_depthwise ? kernel_depth : num_kernels;
368  uint32_t output_height =
369  (input_height - args.kernel_h + args.pad_t + args.pad_b) / args.stride_y +
370  1;
371  uint32_t output_width =
372  (input_width - args.kernel_w + args.pad_l + args.pad_r) / args.stride_x +
373  1;
374 
375  float output_scale = helper.GetSingleArgument<float>("output_scale", 1.0);
376  int output_zero_point = helper.GetSingleArgument<int>("output_zero_point", 0);
377 
378  std::vector<uint32_t> dims(
379  {batches, output_height, output_width, output_channel});
380  output_indices[0] = addTensorOperand(
381  op.output(0), tensor_type_, dims, output_scale, output_zero_point);
382  if (run_depthwise) {
383  CAFFE_ENFORCE_EQ(input_indices_count, 11);
384  result_code = libnnapi_.ANeuralNetworksModel_addOperation(
385  model_,
387  input_indices_count,
388  input_indices,
389  output_indices_count,
390  output_indices);
391  if (result_code != ANEURALNETWORKS_NO_ERROR) {
392  reportError(result_code);
393  }
394  } else {
395  CAFFE_ENFORCE_EQ(input_indices_count, 10);
396  result_code = libnnapi_.ANeuralNetworksModel_addOperation(
397  model_,
399  input_indices_count,
400  input_indices,
401  output_indices_count,
402  output_indices);
403  if (result_code != ANEURALNETWORKS_NO_ERROR) {
404  reportError(result_code);
405  }
406  }
407 }
408 
409 void NNApi::addRelu(const OperatorDef& op) {
410  VLOG(1) << "Add Relu to NN model";
411  CAFFE_ENFORCE_EQ(op.input_size(), 1);
412  CAFFE_ENFORCE_EQ(op.output_size(), 1);
413  const std::string& input = op.input(0);
414  uint32_t input_idx = operand_map_[input];
415 
416  ArgumentHelper helper(op);
417  float output_scale = helper.GetSingleArgument<float>("output_scale", 1.0);
418  int output_zero_point = helper.GetSingleArgument<int>("output_zero_point", 0);
419 
420  uint32_t output_idx = addTensorOperand(
421  op.output(0),
422  tensor_type_,
423  tensor_dims_[input],
424  output_scale,
425  output_zero_point);
426 
427  int result_code = libnnapi_.ANeuralNetworksModel_addOperation(
428  model_, ANEURALNETWORKS_RELU, 1, &input_idx, 1, &output_idx);
429  if (result_code != ANEURALNETWORKS_NO_ERROR) {
430  reportError(result_code);
431  }
432 }
433 
434 void NNApi::addSoftmax(const OperatorDef& op) {
435  VLOG(1) << "Add Softmax to NN model";
436  ArgumentHelper helper(op);
437  CAFFE_ENFORCE_EQ(
438  helper.GetSingleArgument<int>("axis", 1),
439  1,
440  "NN API only supports axis == 1");
441 
442  uint32_t input_indices[2];
443  const std::string& input = op.input(0);
444  input_indices[0] = operand_map_[input];
445  const auto& input_dims = tensor_dims_[input];
446  CAFFE_ENFORCE(
447  input_dims.size() == 2 || input_dims.size() == 4,
448  "Supported tensor rank: 2 or 4");
449 
450  // the positive scaling factor for the exponent, beta
451  const float scale = 1.0;
452  input_indices[1] = addFloatOperand(scale);
453 
454  float output_scale = helper.GetSingleArgument<float>("output_scale", 1.0);
455  int output_zero_point = helper.GetSingleArgument<int>("output_zero_point", 0);
456  if (tensor_type_ == ANEURALNETWORKS_TENSOR_QUANT8_ASYMM) {
457  CAFFE_ENFORCE_EQ(output_scale, 1.f / 256);
458  CAFFE_ENFORCE_EQ(output_zero_point, 0);
459  }
460  uint32_t output_idx = addTensorOperand(
461  op.output(0),
462  tensor_type_,
463  tensor_dims_[input],
464  output_scale,
465  output_zero_point);
466 
467  int result_code = libnnapi_.ANeuralNetworksModel_addOperation(
468  model_, ANEURALNETWORKS_SOFTMAX, 2, input_indices, 1, &output_idx);
469  if (result_code != ANEURALNETWORKS_NO_ERROR) {
470  reportError(result_code);
471  }
472 }
473 
474 // int32_t
475 uint32_t NNApi::addScalarOperand(int32_t val) {
477  scalar.type = ANEURALNETWORKS_INT32;
478  scalar.scale = 0;
479  scalar.zeroPoint = 0;
480  scalar.dimensionCount = 0;
481  scalar.dimensions = NULL;
482  int result_code = libnnapi_.ANeuralNetworksModel_addOperand(model_, &scalar);
483  if (result_code != ANEURALNETWORKS_NO_ERROR) {
484  reportError(result_code);
485  }
486 
487  result_code = libnnapi_.ANeuralNetworksModel_setOperandValue(
488  model_, operand_idx, &val, sizeof(val));
489  if (result_code != ANEURALNETWORKS_NO_ERROR) {
490  reportError(result_code);
491  }
492 
493  VLOG(1) << "Added scalar, " << val << ", at " << operand_idx;
494  return operand_idx++;
495 }
496 
497 // float32
498 uint32_t NNApi::addFloatOperand(float val) {
501  scalar.scale = 0;
502  scalar.zeroPoint = 0;
503  scalar.dimensionCount = 0;
504  scalar.dimensions = NULL;
505  int result_code = libnnapi_.ANeuralNetworksModel_addOperand(model_, &scalar);
506  if (result_code != ANEURALNETWORKS_NO_ERROR) {
507  reportError(result_code);
508  }
509 
510  result_code = libnnapi_.ANeuralNetworksModel_setOperandValue(
511  model_, operand_idx, &val, sizeof(val));
512  if (result_code != ANEURALNETWORKS_NO_ERROR) {
513  reportError(result_code);
514  }
515 
516  VLOG(1) << "Added scalar, " << val << ", at " << operand_idx;
517  return operand_idx++;
518 }
519 
520 uint32_t NNApi::addTensorOperand(
521  const std::string& blob,
522  OperandCode type,
523  std::vector<uint32_t>& dims,
524  float scale,
525  int32_t zero_point)
526 // clang-format off
527 {
528  // clang-format on
529  auto found = operand_map_.find(blob);
530  if (found == operand_map_.end()) {
532  tensor.type = type;
533  tensor.scale = scale;
534  tensor.zeroPoint = zero_point;
535  tensor.dimensionCount = dims.size();
536  tensor.dimensions = dims.data();
537 
538  int result_code =
539  libnnapi_.ANeuralNetworksModel_addOperand(model_, &tensor);
540  if (result_code != ANEURALNETWORKS_NO_ERROR) {
541  reportError(result_code);
542  }
543 
544  operand_map_[blob] = operand_idx++;
545  tensor_dims_[blob] = dims;
546  VLOG(1) << "Added operand, " << blob << ", at " << operand_map_[blob];
547  }
548  return operand_map_[blob];
549 }
550 
551 void NNApi::init(const TensorVector& inputs, TensorVector* outputs) {
552  // model
553  if (!model_) {
554  int result_code = libnnapi_.ANeuralNetworksModel_create(&model_);
555  if (result_code != ANEURALNETWORKS_NO_ERROR) {
556  reportError(result_code);
557  }
558  if (!model_) {
559  CAFFE_THROW("Failed to create NN model");
560  } else {
561  LOG(INFO) << "Created NN model";
562  }
563 
564  ArgumentHelper helper(run_net_);
565  float scale = helper.GetSingleArgument<float>("scale", 1.0);
566  int zero_point = helper.GetSingleArgument<int>("zero_point", 0);
567 
568  // add external input dimension
569  for (int i = 0; i < inputs.size(); i++) {
570  if (inputs[i]->IsType<float>()) {
571  tensor_type_ = ANEURALNETWORKS_TENSOR_FLOAT32;
572  } else if (inputs[i]->IsType<uint8_t>()) {
574  } else {
575  CAFFE_THROW("Unsupported tensor type");
576  }
577  const std::string& input_blob = run_net_.external_input(i);
578  std::vector<uint32_t> dims;
579  for (auto dim : inputs[i]->dims()) {
580  dims.push_back(dim);
581  }
582  addTensorOperand(input_blob, tensor_type_, dims, scale, zero_point);
583  }
584 
585  // add operands and operations
586  for (const auto& op : run_net_.op()) {
587  if (operator_map_.count(op.type()) == 0) {
588  CAFFE_THROW("Unsupported operator");
589  }
590  switch (operator_map_[op.type()]) {
591  case AVERAGEPOOL:
592  addPooling(op, ANEURALNETWORKS_AVERAGE_POOL_2D);
593  break;
594  case CONV:
595  addConv(op);
596  break;
597  case MAXPOOL:
598  addPooling(op, ANEURALNETWORKS_MAX_POOL_2D);
599  break;
600  case RELU:
601  addRelu(op);
602  break;
603  case SOFTMAX:
604  addSoftmax(op);
605  break;
606  default:
607  CAFFE_THROW("Unsupported operator");
608  break;
609  }
610  }
611 
612  // model inputs and outputs
613  int output_size = run_net_.external_output_size();
614  std::vector<uint32_t> input_indices(inputs.size());
615  std::vector<uint32_t> output_indices(output_size);
616  for (int i = 0; i < inputs.size(); i++) {
617  input_indices[i] = operand_map_[run_net_.external_input(i)];
618  }
619  for (int i = 0; i < output_size; i++) {
620  output_indices[i] = operand_map_[run_net_.external_output(i)];
621  }
622 
623  result_code = libnnapi_.ANeuralNetworksModel_identifyInputsAndOutputs(
624  model_,
625  inputs.size(),
626  input_indices.data(),
627  output_size,
628  output_indices.data());
629  if (result_code != ANEURALNETWORKS_NO_ERROR) {
630  reportError(result_code);
631  }
632 
633  result_code = libnnapi_.ANeuralNetworksModel_finish(model_);
634  if (result_code != ANEURALNETWORKS_NO_ERROR) {
635  reportError(result_code);
636  }
637 
638  LOG(INFO) << "Finish creating model";
639 
640  // compile
641  if (!compilation_) {
642  result_code =
643  libnnapi_.ANeuralNetworksCompilation_create(model_, &compilation_);
644  if (result_code != ANEURALNETWORKS_NO_ERROR) {
645  reportError(result_code);
646  }
647 
648  result_code = libnnapi_.ANeuralNetworksCompilation_setPreference(
649  compilation_, preference_);
650  if (result_code != ANEURALNETWORKS_NO_ERROR) {
651  reportError(result_code);
652  }
653 
654  result_code = libnnapi_.ANeuralNetworksCompilation_finish(compilation_);
655  if (result_code != ANEURALNETWORKS_NO_ERROR) {
656  reportError(result_code);
657  }
658 
659  LOG(INFO) << "Finish compilation";
660  }
661 
662  // pre-execution
663  if (!run_) {
664  result_code =
665  libnnapi_.ANeuralNetworksExecution_create(compilation_, &run_);
666  if (result_code != ANEURALNETWORKS_NO_ERROR) {
667  reportError(result_code);
668  }
669  LOG(INFO) << "Created model execution";
670  }
671 
672  // set external input and output
673  for (int i = 0; i < inputs.size(); i++) {
674  result_code = libnnapi_.ANeuralNetworksExecution_setInput(
675  run_, i, NULL, inputs[i]->raw_data(), inputs[i]->size());
676  if (result_code != ANEURALNETWORKS_NO_ERROR) {
677  reportError(result_code);
678  }
679 
680  VLOG(1) << "Set external input " << i << " at " << inputs[i]->raw_data()
681  << ", size = " << inputs[i]->size();
682  }
683  // allocate memory for outputs
684  for (int i = 0; i < output_size; i++) {
685  const std::string& blob = run_net_.external_output(i);
686  if (operand_map_.find(blob) == operand_map_.end()) {
687  CAFFE_THROW("Unknown external output, ", blob);
688  }
689  uint32_t idx = operand_map_[blob];
690  if (tensor_dims_.find(blob) == tensor_dims_.end()) {
691  CAFFE_THROW("Operand dimension unknown");
692  }
693  std::vector<int> output_dims;
694  for (auto dim : tensor_dims_[blob]) {
695  output_dims.push_back(dim);
696  }
697 
698  auto* tensor = ws_.CreateBlob(blob)->GetMutable<TensorCPU>();
699  tensor->Resize(output_dims);
700  outputs->push_back(tensor);
701 
702  if (tensor_type_ == ANEURALNETWORKS_TENSOR_FLOAT32) {
703  result_code = libnnapi_.ANeuralNetworksExecution_setOutput(
704  run_,
705  i,
706  NULL,
707  (void*)tensor->template mutable_data<float>(),
708  tensor->size());
709  if (result_code != ANEURALNETWORKS_NO_ERROR) {
710  reportError(result_code);
711  }
712 
713  } else {
714  result_code = libnnapi_.ANeuralNetworksExecution_setOutput(
715  run_,
716  i,
717  NULL,
718  (void*)tensor->template mutable_data<uint8_t>(),
719  tensor->size());
720  if (result_code != ANEURALNETWORKS_NO_ERROR) {
721  reportError(result_code);
722  }
723  }
724 
725  VLOG(1) << "Set external output " << i << " at " << tensor->raw_data()
726  << ", size = " << tensor->size();
727  }
728  }
729 }
730 
731 } // namespace caffe2
FuseCode
Fused activation function types.
Performs an 2-D convolution operation.
The following entries are used to declare tensors.
int32_t type
The data type, e.g ANEURALNETWORKS_INT8.
Performs an 2-D max pooling operation.
Performs a depthwise 2-D convolution operation.
ANeuralNetworksOperandType describes the type of an operand.
Performs a 2-D average pooling operation.
Computes rectified linear activation on the input tensor element-wise.
Fused ReLU activation function.
OperandCode
Operand types.
uint32_t dimensionCount
The number of dimensions.
Copyright (c) 2016-present, Facebook, Inc.
Computes the softmax activation on the input tensor element-wise, per batch, by normalizing the input...
A signed 32 bit integer scalar value.
A tensor of 8 bit integers that represent real numbers.
NO fused activation function.
OperationCode
Operation types.
float scale
These two fields are only used for quantized tensors.
A tensor of 32 bit integer values.
const uint32_t * dimensions
The dimensions of the tensor.