Caffe2 - C++ API
A deep learning, cross platform ML framework
nnapi.cc
1 #include "caffe2/core/operator.h"
2 #include "caffe2/core/tensor.h"
3 #include "caffe2/core/types.h"
4 #include "caffe2/utils/proto_utils.h"
5 
6 #include "nnapi.h"
7 
8 namespace {
9 // Bug: ANEURALNETWORKS_UNMAPPABLE and ANEURALNETWORKS_OP_FAILED share the same
10 // enum value
11 void reportError(int result_code) {
12  switch (result_code) {
13  case ANEURALNETWORKS_NO_ERROR:
14  break;
15  case ANEURALNETWORKS_OUT_OF_MEMORY:
16  CAFFE_THROW("out of memory");
17  case ANEURALNETWORKS_INCOMPLETE:
18  CAFFE_THROW("incomplete");
19  case ANEURALNETWORKS_UNEXPECTED_NULL:
20  CAFFE_THROW("unexpected null");
21  case ANEURALNETWORKS_BAD_DATA:
22  CAFFE_THROW("bad data");
23  case ANEURALNETWORKS_OP_FAILED:
24  CAFFE_THROW("op failed or unmappable");
25  case ANEURALNETWORKS_BAD_STATE:
26  CAFFE_THROW("bad state");
27  default:
28  CAFFE_THROW("unknown error");
29  }
30 }
31 } // namespace
32 
33 namespace caffe2 {
34 
35 bool NNApi::loadNNApiLibrary() {
36  return dlnnapi_load(&libnnapi_, DLNNAPI_FLAG_VERSION_27);
37 }
38 
39 NNApi::~NNApi() {
40  if (run_end_) {
41  libnnapi_.ANeuralNetworksEvent_free(run_end_);
42  }
43  if (run_) {
44  libnnapi_.ANeuralNetworksExecution_free(run_);
45  }
46  if (compilation_) {
47  libnnapi_.ANeuralNetworksCompilation_free(compilation_);
48  }
49  if (model_) {
50  libnnapi_.ANeuralNetworksModel_free(model_);
51  }
52 }
53 
54 bool NNApi::run(const TensorVector& inputs, TensorVector* outputs) {
55  CAFFE_ENFORCE(inputs.size() <= run_net_.external_input_size());
56  try {
57  init(inputs, outputs);
58  } catch (const std::exception& e) {
59  LOG(ERROR) << "Error duing model initialization: " << e.what();
60  return false;
61  }
62 
63  try {
64  VLOG(1) << "Start compute";
65  int result_code =
66  libnnapi_.ANeuralNetworksExecution_startCompute(run_, &run_end_);
67  if (result_code != ANEURALNETWORKS_NO_ERROR) {
68  reportError(result_code);
69  }
70  result_code = libnnapi_.ANeuralNetworksEvent_wait(run_end_);
71  if (result_code != ANEURALNETWORKS_NO_ERROR) {
72  reportError(result_code);
73  }
74  VLOG(1) << "Finish compute";
75  } catch (const std::exception& e) {
76  LOG(ERROR) << "Error during model run: " << e.what();
77  return false;
78  }
79  return true;
80 }
81 
82 void NNApi::getConvPoolArgs(const ArgumentHelper& helper, ConvPoolArgs& args) {
83  std::vector<int> kernel(helper.GetRepeatedArgument<int>("kernels"));
84  std::vector<int> stride(helper.GetRepeatedArgument<int>("strides"));
85  std::vector<int> pads(helper.GetRepeatedArgument<int>("pads"));
86 
87  // Get old arguments values
88  if (helper.HasArgument("kernel")) {
89  kernel.resize(2, helper.GetSingleArgument<int>("kernel", 0));
90  } else if (helper.HasArgument("kernelh") && helper.HasArgument("kernelw")) {
91  kernel.push_back(helper.GetSingleArgument<int>("kernelh", 0));
92  kernel.push_back(helper.GetSingleArgument<int>("kernelw", 0));
93  }
94 
95  if (helper.HasArgument("stride")) {
96  stride.resize(2, helper.GetSingleArgument<int>("stride", 0));
97  } else if (helper.HasArgument("stride_h") && helper.HasArgument("stride_w")) {
98  stride.push_back(helper.GetSingleArgument<int>("stride_h", 0));
99  stride.push_back(helper.GetSingleArgument<int>("stride_w", 0));
100  }
101 
102  if (helper.HasArgument("pad")) {
103  pads.resize(4, helper.GetSingleArgument<int>("pad", 0));
104  } else if (
105  helper.HasArgument("pad_t") && helper.HasArgument("pad_l") &&
106  helper.HasArgument("pad_b") && helper.HasArgument("pad_r")) {
107  pads.push_back(helper.GetSingleArgument<int>("pad_t", 0));
108  pads.push_back(helper.GetSingleArgument<int>("pad_l", 0));
109  pads.push_back(helper.GetSingleArgument<int>("pad_b", 0));
110  pads.push_back(helper.GetSingleArgument<int>("pad_r", 0));
111  }
112 
113  // Commit values
114  args.kernel_h = kernel.size() > 0 ? kernel[0] : 1;
115  args.kernel_w = kernel.size() > 1 ? kernel[1] : args.kernel_h;
116  args.stride_x = stride.size() > 0 ? stride[0] : 1;
117  args.stride_y = stride.size() > 1 ? stride[1] : 1;
118  args.pad_t = pads.size() > 0 ? pads[0] : 0;
119  args.pad_l = pads.size() > 1 ? pads[1] : 0;
120  args.pad_b = pads.size() > 2 ? pads[2] : 0;
121  args.pad_r = pads.size() > 3 ? pads[3] : 0;
122 }
123 
124 void NNApi::addPooling(
125  const OperatorDef& op,
126  OperationCode op_code,
127  bool fuse_relu)
128 // clang-format off
129 {
130  // clang-format on
131  VLOG(1) << "Add AveragePool to NN model";
132  CAFFE_ENFORCE_EQ(op.input_size(), 1);
133  CAFFE_ENFORCE_EQ(op.output_size(), 1);
134  ArgumentHelper helper(op);
135  StorageOrder order = StringToStorageOrder(
136  helper.GetSingleArgument<std::string>("order", "NCHW"));
137  if (order == NCHW) {
138  CAFFE_THROW("NN API supports NHWC only");
139  }
140 
141  ConvPoolArgs args;
142  getConvPoolArgs(helper, args);
143  CAFFE_ENFORCE_EQ(
144  args.stride_x,
145  args.stride_y,
146  "NN API only supports stride_x == stride_y");
147 
148  // add input operands to model
149  const uint32_t input_indices_count = 10;
150  const uint32_t output_indices_count = 1;
151  uint32_t input_indices[input_indices_count];
152  uint32_t output_indices[output_indices_count];
153 
154  uint32_t idx = 0;
155  // input
156  const std::string& input = op.input(0);
157  const std::vector<uint32_t>& input_dims = tensor_dims_[input];
158  input_indices[idx++] = operand_map_[input];
159 
160  CAFFE_ENFORCE_EQ(input_dims.size(), 4);
161  uint32_t batches = input_dims[0];
162  uint32_t input_height = input_dims[1];
163  uint32_t input_width = input_dims[2];
164  uint32_t channel = input_dims[3];
165 
166  // pads in the order of left, right, top, bottom
167  input_indices[idx++] = addScalarOperand(args.pad_l);
168  input_indices[idx++] = addScalarOperand(args.pad_r);
169  input_indices[idx++] = addScalarOperand(args.pad_t);
170  input_indices[idx++] = addScalarOperand(args.pad_b);
171 
172  // strides
173  input_indices[idx++] = addScalarOperand(args.stride_x);
174  input_indices[idx++] = addScalarOperand(args.stride_y);
175 
176  // kernel size
177  input_indices[idx++] = addScalarOperand(args.kernel_h);
178  input_indices[idx++] = addScalarOperand(args.kernel_w);
179 
180  // fuse relu
183  input_indices[idx] = addScalarOperand(fuse);
184 
185  // output
186  uint32_t output_height =
187  (input_height - args.kernel_h + args.pad_t + args.pad_b) / args.stride_y +
188  1;
189  uint32_t output_width =
190  (input_width - args.kernel_w + args.pad_l + args.pad_r) / args.stride_x +
191  1;
192 
193  float output_scale = helper.GetSingleArgument<float>("output_scale", 1.0);
194  int output_zero_point = helper.GetSingleArgument<int>("output_zero_point", 0);
195 
196  std::vector<uint32_t> dims({batches, output_height, output_width, channel});
197  output_indices[0] = addTensorOperand(
198  op.output(0), tensor_type_, dims, output_scale, output_zero_point);
199 
200  int result_code = libnnapi_.ANeuralNetworksModel_addOperation(
201  model_, op_code, input_indices_count, input_indices, 1, output_indices);
202  if (result_code != ANEURALNETWORKS_NO_ERROR) {
203  reportError(result_code);
204  }
205 }
206 
207 void NNApi::addConv(const OperatorDef& op, bool fuse_relu) {
208  VLOG(1) << "Add Conv to NN model";
209  CAFFE_ENFORCE_EQ(op.input_size(), 3);
210  CAFFE_ENFORCE_EQ(op.output_size(), 1);
211 
212  ArgumentHelper helper(op);
213  StorageOrder order = StringToStorageOrder(
214  helper.GetSingleArgument<std::string>("order", "NCHW"));
215  CAFFE_ENFORCE_EQ(order, NHWC, "NN API supports NHWC only");
216 
217  // input
218  const std::string& input = op.input(0);
219  const std::vector<uint32_t>& input_dims = tensor_dims_[input];
220 
221  CAFFE_ENFORCE_EQ(input_dims.size(), 4);
222  uint32_t batches = input_dims[0];
223  uint32_t input_height = input_dims[1];
224  uint32_t input_width = input_dims[2];
225  uint32_t input_channel = input_dims[3];
226 
227  uint32_t group = helper.GetSingleArgument<int>("group", 1);
228 
229  bool run_depthwise = false;
230  if (group > 1) {
231  CAFFE_ENFORCE_EQ(
232  group,
233  input_channel,
234  "NN API doesn't support non-depthwise convolution with groups");
235  run_depthwise = true;
236  }
237 
238  ConvPoolArgs args;
239  getConvPoolArgs(helper, args);
240 
241  CAFFE_ENFORCE_EQ(
242  args.stride_x,
243  args.stride_y,
244  "NN API only supports stride_x == stride_y");
245 
246  vector<int> dilation(helper.GetRepeatedArgument<int>("dilations"));
247  if (helper.HasArgument("dilation")) {
248  dilation.resize(2, helper.GetSingleArgument<int>("dilation", 0));
249  } else if (
250  helper.HasArgument("dilationh") && helper.HasArgument("dilationw")) {
251  dilation.push_back(helper.GetSingleArgument<int>("dilation_h", 0));
252  dilation.push_back(helper.GetSingleArgument<int>("dilation_w", 0));
253  }
254 
255  for (auto d : dilation) {
256  CAFFE_ENFORCE_EQ(d, 1, "NN API only supports dialation == 1");
257  }
258 
259  // add input operands to model
260  const uint32_t input_indices_count = run_depthwise ? 11 : 10;
261  const uint32_t output_indices_count = 1;
262  uint32_t input_indices[input_indices_count];
263  uint32_t output_indices[output_indices_count];
264 
265  uint32_t idx = 0;
266  // input
267  input_indices[idx++] = operand_map_[input];
268 
269  // weight
270  const std::string& weight_name = op.input(1);
271  const auto& weight = ws_.GetBlob(weight_name)->Get<TensorCPU>();
272  std::vector<uint32_t> weight_dims;
273  for (auto dim : weight.sizes()) {
274  weight_dims.push_back(dim);
275  }
276  CAFFE_ENFORCE_EQ(weight_dims.size(), 4);
277  uint32_t num_kernels = weight_dims[0];
278  uint32_t kernel_h = weight_dims[1];
279  uint32_t kernel_w = weight_dims[2];
280  uint32_t kernel_depth = weight_dims[3];
281  CAFFE_ENFORCE_EQ(input_channel, kernel_depth);
282  if (run_depthwise) {
283  CAFFE_ENFORCE_EQ(num_kernels, 1);
284  }
285 
286  float weight_scale = helper.GetSingleArgument<float>("weight_scale", 1.0);
287  int weight_zero_point = helper.GetSingleArgument<int>("weight_zero_point", 0);
288 
289  uint32_t weight_idx = addTensorOperand(
290  weight_name, tensor_type_, weight_dims, weight_scale, weight_zero_point);
291 
292  int result_code = libnnapi_.ANeuralNetworksModel_setOperandValue(
293  model_, weight_idx, weight.raw_data(), weight.nbytes());
294  if (result_code != ANEURALNETWORKS_NO_ERROR) {
295  reportError(result_code);
296  }
297  input_indices[idx++] = weight_idx;
298 
299  // bias
300  const std::string& bias_name = op.input(2);
301  const auto& bias = ws_.GetBlob(bias_name)->Get<TensorCPU>();
302  std::vector<uint32_t> bias_dims;
303  CAFFE_ENFORCE_EQ(bias.ndim(), 1);
304  uint32_t bias_size = bias.dim(0);
305  if (!run_depthwise) {
306  CAFFE_ENFORCE_EQ(num_kernels, bias_size);
307  } else {
308  CAFFE_ENFORCE_EQ(kernel_depth, bias_size);
309  }
310  bias_dims.push_back(bias_size);
311 
312  OperandCode bias_type = tensor_type_ == ANEURALNETWORKS_TENSOR_FLOAT32
315  if (bias_type == ANEURALNETWORKS_TENSOR_FLOAT32) {
316  CAFFE_ENFORCE(bias.IsType<float>());
317  } else if (bias_type == ANEURALNETWORKS_TENSOR_INT32) {
318  CAFFE_ENFORCE(bias.IsType<int>());
319  }
320  uint32_t bias_idx = addTensorOperand(bias_name, bias_type, bias_dims);
321 
322  result_code = libnnapi_.ANeuralNetworksModel_setOperandValue(
323  model_, bias_idx, bias.raw_data(), bias.nbytes());
324  if (result_code != ANEURALNETWORKS_NO_ERROR) {
325  reportError(result_code);
326  }
327  input_indices[idx++] = bias_idx;
328 
329  // pads in the order of left, right, top, bottom
330  input_indices[idx++] = addScalarOperand(args.pad_l);
331  input_indices[idx++] = addScalarOperand(args.pad_r);
332  input_indices[idx++] = addScalarOperand(args.pad_t);
333  input_indices[idx++] = addScalarOperand(args.pad_b);
334 
335  // strides
336  input_indices[idx++] = addScalarOperand(args.stride_x);
337  input_indices[idx++] = addScalarOperand(args.stride_y);
338 
339  // depth_wise
340  if (run_depthwise) {
341  // depthwise multiplier == 1
342  input_indices[idx++] = addScalarOperand(1);
343  }
344 
345  // fuse relu
348  input_indices[idx] = addScalarOperand(fuse);
349 
350  // output
351  uint32_t output_channel = run_depthwise ? kernel_depth : num_kernels;
352  uint32_t output_height =
353  (input_height - args.kernel_h + args.pad_t + args.pad_b) / args.stride_y +
354  1;
355  uint32_t output_width =
356  (input_width - args.kernel_w + args.pad_l + args.pad_r) / args.stride_x +
357  1;
358 
359  float output_scale = helper.GetSingleArgument<float>("output_scale", 1.0);
360  int output_zero_point = helper.GetSingleArgument<int>("output_zero_point", 0);
361 
362  std::vector<uint32_t> dims(
363  {batches, output_height, output_width, output_channel});
364  output_indices[0] = addTensorOperand(
365  op.output(0), tensor_type_, dims, output_scale, output_zero_point);
366  if (run_depthwise) {
367  CAFFE_ENFORCE_EQ(input_indices_count, 11);
368  result_code = libnnapi_.ANeuralNetworksModel_addOperation(
369  model_,
371  input_indices_count,
372  input_indices,
373  output_indices_count,
374  output_indices);
375  if (result_code != ANEURALNETWORKS_NO_ERROR) {
376  reportError(result_code);
377  }
378  } else {
379  CAFFE_ENFORCE_EQ(input_indices_count, 10);
380  result_code = libnnapi_.ANeuralNetworksModel_addOperation(
381  model_,
383  input_indices_count,
384  input_indices,
385  output_indices_count,
386  output_indices);
387  if (result_code != ANEURALNETWORKS_NO_ERROR) {
388  reportError(result_code);
389  }
390  }
391 }
392 
393 void NNApi::addRelu(const OperatorDef& op) {
394  VLOG(1) << "Add Relu to NN model";
395  CAFFE_ENFORCE_EQ(op.input_size(), 1);
396  CAFFE_ENFORCE_EQ(op.output_size(), 1);
397  const std::string& input = op.input(0);
398  uint32_t input_idx = operand_map_[input];
399 
400  ArgumentHelper helper(op);
401  float output_scale = helper.GetSingleArgument<float>("output_scale", 1.0);
402  int output_zero_point = helper.GetSingleArgument<int>("output_zero_point", 0);
403 
404  uint32_t output_idx = addTensorOperand(
405  op.output(0),
406  tensor_type_,
407  tensor_dims_[input],
408  output_scale,
409  output_zero_point);
410 
411  int result_code = libnnapi_.ANeuralNetworksModel_addOperation(
412  model_, ANEURALNETWORKS_RELU, 1, &input_idx, 1, &output_idx);
413  if (result_code != ANEURALNETWORKS_NO_ERROR) {
414  reportError(result_code);
415  }
416 }
417 
418 void NNApi::addSoftmax(const OperatorDef& op) {
419  VLOG(1) << "Add Softmax to NN model";
420  ArgumentHelper helper(op);
421  CAFFE_ENFORCE_EQ(
422  helper.GetSingleArgument<int>("axis", 1),
423  1,
424  "NN API only supports axis == 1");
425 
426  uint32_t input_indices[2];
427  const std::string& input = op.input(0);
428  input_indices[0] = operand_map_[input];
429  const auto& input_dims = tensor_dims_[input];
430  CAFFE_ENFORCE(
431  input_dims.size() == 2 || input_dims.size() == 4,
432  "Supported tensor rank: 2 or 4");
433 
434  // the positive scaling factor for the exponent, beta
435  const float scale = 1.0;
436  input_indices[1] = addFloatOperand(scale);
437 
438  float output_scale = helper.GetSingleArgument<float>("output_scale", 1.0);
439  int output_zero_point = helper.GetSingleArgument<int>("output_zero_point", 0);
440  if (tensor_type_ == ANEURALNETWORKS_TENSOR_QUANT8_ASYMM) {
441  CAFFE_ENFORCE_EQ(output_scale, 1.f / 256);
442  CAFFE_ENFORCE_EQ(output_zero_point, 0);
443  }
444  uint32_t output_idx = addTensorOperand(
445  op.output(0),
446  tensor_type_,
447  tensor_dims_[input],
448  output_scale,
449  output_zero_point);
450 
451  int result_code = libnnapi_.ANeuralNetworksModel_addOperation(
452  model_, ANEURALNETWORKS_SOFTMAX, 2, input_indices, 1, &output_idx);
453  if (result_code != ANEURALNETWORKS_NO_ERROR) {
454  reportError(result_code);
455  }
456 }
457 
458 // int32_t
459 uint32_t NNApi::addScalarOperand(int32_t val) {
461  scalar.type = ANEURALNETWORKS_INT32;
462  scalar.scale = 0;
463  scalar.zeroPoint = 0;
464  scalar.dimensionCount = 0;
465  scalar.dimensions = NULL;
466  int result_code = libnnapi_.ANeuralNetworksModel_addOperand(model_, &scalar);
467  if (result_code != ANEURALNETWORKS_NO_ERROR) {
468  reportError(result_code);
469  }
470 
471  result_code = libnnapi_.ANeuralNetworksModel_setOperandValue(
472  model_, operand_idx, &val, sizeof(val));
473  if (result_code != ANEURALNETWORKS_NO_ERROR) {
474  reportError(result_code);
475  }
476 
477  VLOG(1) << "Added scalar, " << val << ", at " << operand_idx;
478  return operand_idx++;
479 }
480 
481 // float32
482 uint32_t NNApi::addFloatOperand(float val) {
485  scalar.scale = 0;
486  scalar.zeroPoint = 0;
487  scalar.dimensionCount = 0;
488  scalar.dimensions = NULL;
489  int result_code = libnnapi_.ANeuralNetworksModel_addOperand(model_, &scalar);
490  if (result_code != ANEURALNETWORKS_NO_ERROR) {
491  reportError(result_code);
492  }
493 
494  result_code = libnnapi_.ANeuralNetworksModel_setOperandValue(
495  model_, operand_idx, &val, sizeof(val));
496  if (result_code != ANEURALNETWORKS_NO_ERROR) {
497  reportError(result_code);
498  }
499 
500  VLOG(1) << "Added scalar, " << val << ", at " << operand_idx;
501  return operand_idx++;
502 }
503 
504 uint32_t NNApi::addTensorOperand(
505  const std::string& blob,
506  OperandCode type,
507  std::vector<uint32_t>& dims,
508  float scale,
509  int32_t zero_point)
510 // clang-format off
511 {
512  // clang-format on
513  auto found = operand_map_.find(blob);
514  if (found == operand_map_.end()) {
516  tensor.type = type;
517  tensor.scale = scale;
518  tensor.zeroPoint = zero_point;
519  tensor.dimensionCount = dims.size();
520  tensor.dimensions = dims.data();
521 
522  int result_code =
523  libnnapi_.ANeuralNetworksModel_addOperand(model_, &tensor);
524  if (result_code != ANEURALNETWORKS_NO_ERROR) {
525  reportError(result_code);
526  }
527 
528  operand_map_[blob] = operand_idx++;
529  tensor_dims_[blob] = dims;
530  VLOG(1) << "Added operand, " << blob << ", at " << operand_map_[blob];
531  }
532  return operand_map_[blob];
533 }
534 
535 void NNApi::init(const TensorVector& inputs, TensorVector* outputs) {
536  // model
537  if (!model_) {
538  int result_code = libnnapi_.ANeuralNetworksModel_create(&model_);
539  if (result_code != ANEURALNETWORKS_NO_ERROR) {
540  reportError(result_code);
541  }
542  if (!model_) {
543  CAFFE_THROW("Failed to create NN model");
544  } else {
545  LOG(INFO) << "Created NN model";
546  }
547 
548  ArgumentHelper helper(run_net_);
549  float scale = helper.GetSingleArgument<float>("scale", 1.0);
550  int zero_point = helper.GetSingleArgument<int>("zero_point", 0);
551 
552  // add external input dimension
553  for (int i = 0; i < inputs.size(); i++) {
554  if (inputs[i]->IsType<float>()) {
555  tensor_type_ = ANEURALNETWORKS_TENSOR_FLOAT32;
556  } else if (inputs[i]->IsType<uint8_t>()) {
558  } else {
559  CAFFE_THROW("Unsupported tensor type");
560  }
561  const std::string& input_blob = run_net_.external_input(i);
562  std::vector<uint32_t> dims;
563  for (auto dim : inputs[i]->sizes()) {
564  dims.push_back(dim);
565  }
566  addTensorOperand(input_blob, tensor_type_, dims, scale, zero_point);
567  }
568 
569  // add operands and operations
570  for (const auto& op : run_net_.op()) {
571  if (operator_map_.count(op.type()) == 0) {
572  CAFFE_THROW("Unsupported operator");
573  }
574  switch (operator_map_[op.type()]) {
575  case AVERAGEPOOL:
576  addPooling(op, ANEURALNETWORKS_AVERAGE_POOL_2D);
577  break;
578  case CONV:
579  addConv(op);
580  break;
581  case MAXPOOL:
582  addPooling(op, ANEURALNETWORKS_MAX_POOL_2D);
583  break;
584  case RELU:
585  addRelu(op);
586  break;
587  case SOFTMAX:
588  addSoftmax(op);
589  break;
590  default:
591  CAFFE_THROW("Unsupported operator");
592  break;
593  }
594  }
595 
596  // model inputs and outputs
597  int output_size = run_net_.external_output_size();
598  std::vector<uint32_t> input_indices(inputs.size());
599  std::vector<uint32_t> output_indices(output_size);
600  for (int i = 0; i < inputs.size(); i++) {
601  input_indices[i] = operand_map_[run_net_.external_input(i)];
602  }
603  for (int i = 0; i < output_size; i++) {
604  output_indices[i] = operand_map_[run_net_.external_output(i)];
605  }
606 
607  result_code = libnnapi_.ANeuralNetworksModel_identifyInputsAndOutputs(
608  model_,
609  inputs.size(),
610  input_indices.data(),
611  output_size,
612  output_indices.data());
613  if (result_code != ANEURALNETWORKS_NO_ERROR) {
614  reportError(result_code);
615  }
616 
617  result_code = libnnapi_.ANeuralNetworksModel_finish(model_);
618  if (result_code != ANEURALNETWORKS_NO_ERROR) {
619  reportError(result_code);
620  }
621 
622  LOG(INFO) << "Finish creating model";
623 
624  // compile
625  if (!compilation_) {
626  result_code =
627  libnnapi_.ANeuralNetworksCompilation_create(model_, &compilation_);
628  if (result_code != ANEURALNETWORKS_NO_ERROR) {
629  reportError(result_code);
630  }
631 
632  result_code = libnnapi_.ANeuralNetworksCompilation_setPreference(
633  compilation_, preference_);
634  if (result_code != ANEURALNETWORKS_NO_ERROR) {
635  reportError(result_code);
636  }
637 
638  result_code = libnnapi_.ANeuralNetworksCompilation_finish(compilation_);
639  if (result_code != ANEURALNETWORKS_NO_ERROR) {
640  reportError(result_code);
641  }
642 
643  LOG(INFO) << "Finish compilation";
644  }
645 
646  // pre-execution
647  if (!run_) {
648  result_code =
649  libnnapi_.ANeuralNetworksExecution_create(compilation_, &run_);
650  if (result_code != ANEURALNETWORKS_NO_ERROR) {
651  reportError(result_code);
652  }
653  LOG(INFO) << "Created model execution";
654  }
655 
656  // set external input and output
657  for (int i = 0; i < inputs.size(); i++) {
658  result_code = libnnapi_.ANeuralNetworksExecution_setInput(
659  run_, i, NULL, inputs[i]->raw_data(), inputs[i]->size());
660  if (result_code != ANEURALNETWORKS_NO_ERROR) {
661  reportError(result_code);
662  }
663 
664  VLOG(1) << "Set external input " << i << " at " << inputs[i]->raw_data()
665  << ", size = " << inputs[i]->size();
666  }
667  // allocate memory for outputs
668  for (int i = 0; i < output_size; i++) {
669  const std::string& blob = run_net_.external_output(i);
670  if (operand_map_.find(blob) == operand_map_.end()) {
671  CAFFE_THROW("Unknown external output, ", blob);
672  }
673  uint32_t idx = operand_map_[blob];
674  if (tensor_dims_.find(blob) == tensor_dims_.end()) {
675  CAFFE_THROW("Operand dimension unknown");
676  }
677  std::vector<int> output_dims;
678  for (auto dim : tensor_dims_[blob]) {
679  output_dims.push_back(dim);
680  }
681 
682  auto* tensor = BlobGetMutableTensor(ws_.CreateBlob(blob), CPU);
683  tensor->Resize(output_dims);
684  outputs->push_back(tensor);
685 
686  if (tensor_type_ == ANEURALNETWORKS_TENSOR_FLOAT32) {
687  result_code = libnnapi_.ANeuralNetworksExecution_setOutput(
688  run_,
689  i,
690  NULL,
691  (void*)tensor->template mutable_data<float>(),
692  tensor->size());
693  if (result_code != ANEURALNETWORKS_NO_ERROR) {
694  reportError(result_code);
695  }
696 
697  } else {
698  result_code = libnnapi_.ANeuralNetworksExecution_setOutput(
699  run_,
700  i,
701  NULL,
702  (void*)tensor->template mutable_data<uint8_t>(),
703  tensor->size());
704  if (result_code != ANEURALNETWORKS_NO_ERROR) {
705  reportError(result_code);
706  }
707  }
708 
709  VLOG(1) << "Set external output " << i << " at " << tensor->raw_data()
710  << ", size = " << tensor->size();
711  }
712  }
713 }
714 
715 } // namespace caffe2
FuseCode
Fused activation function types.
Performs an 2-D convolution operation.
The following entries are used to declare tensors.
int32_t type
The data type, e.g ANEURALNETWORKS_INT8.
Performs an 2-D max pooling operation.
Performs a depthwise 2-D convolution operation.
ANeuralNetworksOperandType describes the type of an operand.
Performs a 2-D average pooling operation.
Computes rectified linear activation on the input tensor element-wise.
Fused ReLU activation function.
OperandCode
Operand types.
uint32_t dimensionCount
The number of dimensions.
A global dictionary that holds information about what Caffe2 modules have been loaded in the current ...
Definition: blob.h:13
Computes the softmax activation on the input tensor element-wise, per batch, by normalizing the input...
A signed 32 bit integer scalar value.
A tensor of 8 bit integers that represent real numbers.
NO fused activation function.
OperationCode
Operation types.
float scale
These two fields are only used for quantized tensors.
A tensor of 32 bit integer values.
const uint32_t * dimensions
The dimensions of the tensor.