1 #include "caffe2/core/operator.h" 2 #include "caffe2/core/tensor.h" 3 #include "caffe2/core/types.h" 4 #include "caffe2/utils/proto_utils.h" 11 void reportError(
int result_code) {
12 switch (result_code) {
13 case ANEURALNETWORKS_NO_ERROR:
15 case ANEURALNETWORKS_OUT_OF_MEMORY:
16 CAFFE_THROW(
"out of memory");
17 case ANEURALNETWORKS_INCOMPLETE:
18 CAFFE_THROW(
"incomplete");
19 case ANEURALNETWORKS_UNEXPECTED_NULL:
20 CAFFE_THROW(
"unexpected null");
21 case ANEURALNETWORKS_BAD_DATA:
22 CAFFE_THROW(
"bad data");
23 case ANEURALNETWORKS_OP_FAILED:
24 CAFFE_THROW(
"op failed or unmappable");
25 case ANEURALNETWORKS_BAD_STATE:
26 CAFFE_THROW(
"bad state");
28 CAFFE_THROW(
"unknown error");
35 bool NNApi::loadNNApiLibrary() {
36 return dlnnapi_load(&libnnapi_, DLNNAPI_FLAG_VERSION_27);
41 libnnapi_.ANeuralNetworksEvent_free(run_end_);
44 libnnapi_.ANeuralNetworksExecution_free(run_);
47 libnnapi_.ANeuralNetworksCompilation_free(compilation_);
50 libnnapi_.ANeuralNetworksModel_free(model_);
54 bool NNApi::run(
const TensorVector& inputs, TensorVector* outputs) {
55 CAFFE_ENFORCE(inputs.size() <= run_net_.external_input_size());
57 init(inputs, outputs);
58 }
catch (
const std::exception& e) {
59 LOG(ERROR) <<
"Error duing model initialization: " << e.what();
64 VLOG(1) <<
"Start compute";
66 libnnapi_.ANeuralNetworksExecution_startCompute(run_, &run_end_);
67 if (result_code != ANEURALNETWORKS_NO_ERROR) {
68 reportError(result_code);
70 result_code = libnnapi_.ANeuralNetworksEvent_wait(run_end_);
71 if (result_code != ANEURALNETWORKS_NO_ERROR) {
72 reportError(result_code);
74 VLOG(1) <<
"Finish compute";
75 }
catch (
const std::exception& e) {
76 LOG(ERROR) <<
"Error during model run: " << e.what();
82 void NNApi::getConvPoolArgs(
const ArgumentHelper& helper, ConvPoolArgs& args) {
83 std::vector<int> kernel(helper.GetRepeatedArgument<
int>(
"kernels"));
84 std::vector<int> stride(helper.GetRepeatedArgument<
int>(
"strides"));
85 std::vector<int> pads(helper.GetRepeatedArgument<
int>(
"pads"));
88 if (helper.HasArgument(
"kernel")) {
89 kernel.resize(2, helper.GetSingleArgument<
int>(
"kernel", 0));
90 }
else if (helper.HasArgument(
"kernelh") && helper.HasArgument(
"kernelw")) {
91 kernel.push_back(helper.GetSingleArgument<
int>(
"kernelh", 0));
92 kernel.push_back(helper.GetSingleArgument<
int>(
"kernelw", 0));
95 if (helper.HasArgument(
"stride")) {
96 stride.resize(2, helper.GetSingleArgument<
int>(
"stride", 0));
97 }
else if (helper.HasArgument(
"stride_h") && helper.HasArgument(
"stride_w")) {
98 stride.push_back(helper.GetSingleArgument<
int>(
"stride_h", 0));
99 stride.push_back(helper.GetSingleArgument<
int>(
"stride_w", 0));
102 if (helper.HasArgument(
"pad")) {
103 pads.resize(4, helper.GetSingleArgument<
int>(
"pad", 0));
105 helper.HasArgument(
"pad_t") && helper.HasArgument(
"pad_l") &&
106 helper.HasArgument(
"pad_b") && helper.HasArgument(
"pad_r")) {
107 pads.push_back(helper.GetSingleArgument<
int>(
"pad_t", 0));
108 pads.push_back(helper.GetSingleArgument<
int>(
"pad_l", 0));
109 pads.push_back(helper.GetSingleArgument<
int>(
"pad_b", 0));
110 pads.push_back(helper.GetSingleArgument<
int>(
"pad_r", 0));
114 args.kernel_h = kernel.size() > 0 ? kernel[0] : 1;
115 args.kernel_w = kernel.size() > 1 ? kernel[1] : args.kernel_h;
116 args.stride_x = stride.size() > 0 ? stride[0] : 1;
117 args.stride_y = stride.size() > 1 ? stride[1] : 1;
118 args.pad_t = pads.size() > 0 ? pads[0] : 0;
119 args.pad_l = pads.size() > 1 ? pads[1] : 0;
120 args.pad_b = pads.size() > 2 ? pads[2] : 0;
121 args.pad_r = pads.size() > 3 ? pads[3] : 0;
124 void NNApi::addPooling(
125 const OperatorDef& op,
131 VLOG(1) <<
"Add AveragePool to NN model";
132 CAFFE_ENFORCE_EQ(op.input_size(), 1);
133 CAFFE_ENFORCE_EQ(op.output_size(), 1);
134 ArgumentHelper helper(op);
135 StorageOrder order = StringToStorageOrder(
136 helper.GetSingleArgument<std::string>(
"order",
"NCHW"));
138 CAFFE_THROW(
"NN API supports NHWC only");
142 getConvPoolArgs(helper, args);
146 "NN API only supports stride_x == stride_y");
149 const uint32_t input_indices_count = 10;
150 const uint32_t output_indices_count = 1;
151 uint32_t input_indices[input_indices_count];
152 uint32_t output_indices[output_indices_count];
156 const std::string& input = op.input(0);
157 const std::vector<uint32_t>& input_dims = tensor_dims_[input];
158 input_indices[idx++] = operand_map_[input];
160 CAFFE_ENFORCE_EQ(input_dims.size(), 4);
161 uint32_t batches = input_dims[0];
162 uint32_t input_height = input_dims[1];
163 uint32_t input_width = input_dims[2];
164 uint32_t channel = input_dims[3];
167 input_indices[idx++] = addScalarOperand(args.pad_l);
168 input_indices[idx++] = addScalarOperand(args.pad_r);
169 input_indices[idx++] = addScalarOperand(args.pad_t);
170 input_indices[idx++] = addScalarOperand(args.pad_b);
173 input_indices[idx++] = addScalarOperand(args.stride_x);
174 input_indices[idx++] = addScalarOperand(args.stride_y);
177 input_indices[idx++] = addScalarOperand(args.kernel_h);
178 input_indices[idx++] = addScalarOperand(args.kernel_w);
183 input_indices[idx] = addScalarOperand(fuse);
186 uint32_t output_height =
187 (input_height - args.kernel_h + args.pad_t + args.pad_b) / args.stride_y +
189 uint32_t output_width =
190 (input_width - args.kernel_w + args.pad_l + args.pad_r) / args.stride_x +
193 float output_scale = helper.GetSingleArgument<
float>(
"output_scale", 1.0);
194 int output_zero_point = helper.GetSingleArgument<
int>(
"output_zero_point", 0);
196 std::vector<uint32_t> dims({batches, output_height, output_width, channel});
197 output_indices[0] = addTensorOperand(
198 op.output(0), tensor_type_, dims, output_scale, output_zero_point);
200 int result_code = libnnapi_.ANeuralNetworksModel_addOperation(
201 model_, op_code, input_indices_count, input_indices, 1, output_indices);
202 if (result_code != ANEURALNETWORKS_NO_ERROR) {
203 reportError(result_code);
207 void NNApi::addConv(
const OperatorDef& op,
bool fuse_relu) {
208 VLOG(1) <<
"Add Conv to NN model";
209 CAFFE_ENFORCE_EQ(op.input_size(), 3);
210 CAFFE_ENFORCE_EQ(op.output_size(), 1);
212 ArgumentHelper helper(op);
213 StorageOrder order = StringToStorageOrder(
214 helper.GetSingleArgument<std::string>(
"order",
"NCHW"));
215 CAFFE_ENFORCE_EQ(order, NHWC,
"NN API supports NHWC only");
218 const std::string& input = op.input(0);
219 const std::vector<uint32_t>& input_dims = tensor_dims_[input];
221 CAFFE_ENFORCE_EQ(input_dims.size(), 4);
222 uint32_t batches = input_dims[0];
223 uint32_t input_height = input_dims[1];
224 uint32_t input_width = input_dims[2];
225 uint32_t input_channel = input_dims[3];
227 uint32_t group = helper.GetSingleArgument<
int>(
"group", 1);
229 bool run_depthwise =
false;
234 "NN API doesn't support non-depthwise convolution with groups");
235 run_depthwise =
true;
239 getConvPoolArgs(helper, args);
244 "NN API only supports stride_x == stride_y");
246 vector<int> dilation(helper.GetRepeatedArgument<
int>(
"dilations"));
247 if (helper.HasArgument(
"dilation")) {
248 dilation.resize(2, helper.GetSingleArgument<
int>(
"dilation", 0));
250 helper.HasArgument(
"dilationh") && helper.HasArgument(
"dilationw")) {
251 dilation.push_back(helper.GetSingleArgument<
int>(
"dilation_h", 0));
252 dilation.push_back(helper.GetSingleArgument<
int>(
"dilation_w", 0));
255 for (
auto d : dilation) {
256 CAFFE_ENFORCE_EQ(d, 1,
"NN API only supports dialation == 1");
260 const uint32_t input_indices_count = run_depthwise ? 11 : 10;
261 const uint32_t output_indices_count = 1;
262 uint32_t input_indices[input_indices_count];
263 uint32_t output_indices[output_indices_count];
267 input_indices[idx++] = operand_map_[input];
270 const std::string& weight_name = op.input(1);
271 const auto& weight = ws_.GetBlob(weight_name)->Get<TensorCPU>();
272 std::vector<uint32_t> weight_dims;
273 for (
auto dim : weight.sizes()) {
274 weight_dims.push_back(dim);
276 CAFFE_ENFORCE_EQ(weight_dims.size(), 4);
277 uint32_t num_kernels = weight_dims[0];
278 uint32_t kernel_h = weight_dims[1];
279 uint32_t kernel_w = weight_dims[2];
280 uint32_t kernel_depth = weight_dims[3];
281 CAFFE_ENFORCE_EQ(input_channel, kernel_depth);
283 CAFFE_ENFORCE_EQ(num_kernels, 1);
286 float weight_scale = helper.GetSingleArgument<
float>(
"weight_scale", 1.0);
287 int weight_zero_point = helper.GetSingleArgument<
int>(
"weight_zero_point", 0);
289 uint32_t weight_idx = addTensorOperand(
290 weight_name, tensor_type_, weight_dims, weight_scale, weight_zero_point);
292 int result_code = libnnapi_.ANeuralNetworksModel_setOperandValue(
293 model_, weight_idx, weight.raw_data(), weight.nbytes());
294 if (result_code != ANEURALNETWORKS_NO_ERROR) {
295 reportError(result_code);
297 input_indices[idx++] = weight_idx;
300 const std::string& bias_name = op.input(2);
301 const auto& bias = ws_.GetBlob(bias_name)->Get<TensorCPU>();
302 std::vector<uint32_t> bias_dims;
303 CAFFE_ENFORCE_EQ(bias.ndim(), 1);
304 uint32_t bias_size = bias.dim(0);
305 if (!run_depthwise) {
306 CAFFE_ENFORCE_EQ(num_kernels, bias_size);
308 CAFFE_ENFORCE_EQ(kernel_depth, bias_size);
310 bias_dims.push_back(bias_size);
316 CAFFE_ENFORCE(bias.IsType<
float>());
318 CAFFE_ENFORCE(bias.IsType<
int>());
320 uint32_t bias_idx = addTensorOperand(bias_name, bias_type, bias_dims);
322 result_code = libnnapi_.ANeuralNetworksModel_setOperandValue(
323 model_, bias_idx, bias.raw_data(), bias.nbytes());
324 if (result_code != ANEURALNETWORKS_NO_ERROR) {
325 reportError(result_code);
327 input_indices[idx++] = bias_idx;
330 input_indices[idx++] = addScalarOperand(args.pad_l);
331 input_indices[idx++] = addScalarOperand(args.pad_r);
332 input_indices[idx++] = addScalarOperand(args.pad_t);
333 input_indices[idx++] = addScalarOperand(args.pad_b);
336 input_indices[idx++] = addScalarOperand(args.stride_x);
337 input_indices[idx++] = addScalarOperand(args.stride_y);
342 input_indices[idx++] = addScalarOperand(1);
348 input_indices[idx] = addScalarOperand(fuse);
351 uint32_t output_channel = run_depthwise ? kernel_depth : num_kernels;
352 uint32_t output_height =
353 (input_height - args.kernel_h + args.pad_t + args.pad_b) / args.stride_y +
355 uint32_t output_width =
356 (input_width - args.kernel_w + args.pad_l + args.pad_r) / args.stride_x +
359 float output_scale = helper.GetSingleArgument<
float>(
"output_scale", 1.0);
360 int output_zero_point = helper.GetSingleArgument<
int>(
"output_zero_point", 0);
362 std::vector<uint32_t> dims(
363 {batches, output_height, output_width, output_channel});
364 output_indices[0] = addTensorOperand(
365 op.output(0), tensor_type_, dims, output_scale, output_zero_point);
367 CAFFE_ENFORCE_EQ(input_indices_count, 11);
368 result_code = libnnapi_.ANeuralNetworksModel_addOperation(
373 output_indices_count,
375 if (result_code != ANEURALNETWORKS_NO_ERROR) {
376 reportError(result_code);
379 CAFFE_ENFORCE_EQ(input_indices_count, 10);
380 result_code = libnnapi_.ANeuralNetworksModel_addOperation(
385 output_indices_count,
387 if (result_code != ANEURALNETWORKS_NO_ERROR) {
388 reportError(result_code);
393 void NNApi::addRelu(
const OperatorDef& op) {
394 VLOG(1) <<
"Add Relu to NN model";
395 CAFFE_ENFORCE_EQ(op.input_size(), 1);
396 CAFFE_ENFORCE_EQ(op.output_size(), 1);
397 const std::string& input = op.input(0);
398 uint32_t input_idx = operand_map_[input];
400 ArgumentHelper helper(op);
401 float output_scale = helper.GetSingleArgument<
float>(
"output_scale", 1.0);
402 int output_zero_point = helper.GetSingleArgument<
int>(
"output_zero_point", 0);
404 uint32_t output_idx = addTensorOperand(
411 int result_code = libnnapi_.ANeuralNetworksModel_addOperation(
413 if (result_code != ANEURALNETWORKS_NO_ERROR) {
414 reportError(result_code);
418 void NNApi::addSoftmax(
const OperatorDef& op) {
419 VLOG(1) <<
"Add Softmax to NN model";
420 ArgumentHelper helper(op);
422 helper.GetSingleArgument<
int>(
"axis", 1),
424 "NN API only supports axis == 1");
426 uint32_t input_indices[2];
427 const std::string& input = op.input(0);
428 input_indices[0] = operand_map_[input];
429 const auto& input_dims = tensor_dims_[input];
431 input_dims.size() == 2 || input_dims.size() == 4,
432 "Supported tensor rank: 2 or 4");
435 const float scale = 1.0;
436 input_indices[1] = addFloatOperand(scale);
438 float output_scale = helper.GetSingleArgument<
float>(
"output_scale", 1.0);
439 int output_zero_point = helper.GetSingleArgument<
int>(
"output_zero_point", 0);
441 CAFFE_ENFORCE_EQ(output_scale, 1.f / 256);
442 CAFFE_ENFORCE_EQ(output_zero_point, 0);
444 uint32_t output_idx = addTensorOperand(
451 int result_code = libnnapi_.ANeuralNetworksModel_addOperation(
453 if (result_code != ANEURALNETWORKS_NO_ERROR) {
454 reportError(result_code);
459 uint32_t NNApi::addScalarOperand(int32_t val) {
463 scalar.zeroPoint = 0;
466 int result_code = libnnapi_.ANeuralNetworksModel_addOperand(model_, &scalar);
467 if (result_code != ANEURALNETWORKS_NO_ERROR) {
468 reportError(result_code);
471 result_code = libnnapi_.ANeuralNetworksModel_setOperandValue(
472 model_, operand_idx, &val,
sizeof(val));
473 if (result_code != ANEURALNETWORKS_NO_ERROR) {
474 reportError(result_code);
477 VLOG(1) <<
"Added scalar, " << val <<
", at " << operand_idx;
478 return operand_idx++;
482 uint32_t NNApi::addFloatOperand(
float val) {
486 scalar.zeroPoint = 0;
489 int result_code = libnnapi_.ANeuralNetworksModel_addOperand(model_, &scalar);
490 if (result_code != ANEURALNETWORKS_NO_ERROR) {
491 reportError(result_code);
494 result_code = libnnapi_.ANeuralNetworksModel_setOperandValue(
495 model_, operand_idx, &val,
sizeof(val));
496 if (result_code != ANEURALNETWORKS_NO_ERROR) {
497 reportError(result_code);
500 VLOG(1) <<
"Added scalar, " << val <<
", at " << operand_idx;
501 return operand_idx++;
504 uint32_t NNApi::addTensorOperand(
505 const std::string& blob,
507 std::vector<uint32_t>& dims,
513 auto found = operand_map_.find(blob);
514 if (found == operand_map_.end()) {
517 tensor.
scale = scale;
518 tensor.zeroPoint = zero_point;
523 libnnapi_.ANeuralNetworksModel_addOperand(model_, &tensor);
524 if (result_code != ANEURALNETWORKS_NO_ERROR) {
525 reportError(result_code);
528 operand_map_[blob] = operand_idx++;
529 tensor_dims_[blob] = dims;
530 VLOG(1) <<
"Added operand, " << blob <<
", at " << operand_map_[blob];
532 return operand_map_[blob];
535 void NNApi::init(
const TensorVector& inputs, TensorVector* outputs) {
538 int result_code = libnnapi_.ANeuralNetworksModel_create(&model_);
539 if (result_code != ANEURALNETWORKS_NO_ERROR) {
540 reportError(result_code);
543 CAFFE_THROW(
"Failed to create NN model");
545 LOG(INFO) <<
"Created NN model";
548 ArgumentHelper helper(run_net_);
549 float scale = helper.GetSingleArgument<
float>(
"scale", 1.0);
550 int zero_point = helper.GetSingleArgument<
int>(
"zero_point", 0);
553 for (
int i = 0; i < inputs.size(); i++) {
554 if (inputs[i]->IsType<float>()) {
556 }
else if (inputs[i]->IsType<uint8_t>()) {
559 CAFFE_THROW(
"Unsupported tensor type");
561 const std::string& input_blob = run_net_.external_input(i);
562 std::vector<uint32_t> dims;
563 for (
auto dim : inputs[i]->sizes()) {
566 addTensorOperand(input_blob, tensor_type_, dims, scale, zero_point);
570 for (
const auto& op : run_net_.op()) {
571 if (operator_map_.count(op.type()) == 0) {
572 CAFFE_THROW(
"Unsupported operator");
574 switch (operator_map_[op.type()]) {
591 CAFFE_THROW(
"Unsupported operator");
597 int output_size = run_net_.external_output_size();
598 std::vector<uint32_t> input_indices(inputs.size());
599 std::vector<uint32_t> output_indices(output_size);
600 for (
int i = 0; i < inputs.size(); i++) {
601 input_indices[i] = operand_map_[run_net_.external_input(i)];
603 for (
int i = 0; i < output_size; i++) {
604 output_indices[i] = operand_map_[run_net_.external_output(i)];
607 result_code = libnnapi_.ANeuralNetworksModel_identifyInputsAndOutputs(
610 input_indices.data(),
612 output_indices.data());
613 if (result_code != ANEURALNETWORKS_NO_ERROR) {
614 reportError(result_code);
617 result_code = libnnapi_.ANeuralNetworksModel_finish(model_);
618 if (result_code != ANEURALNETWORKS_NO_ERROR) {
619 reportError(result_code);
622 LOG(INFO) <<
"Finish creating model";
627 libnnapi_.ANeuralNetworksCompilation_create(model_, &compilation_);
628 if (result_code != ANEURALNETWORKS_NO_ERROR) {
629 reportError(result_code);
632 result_code = libnnapi_.ANeuralNetworksCompilation_setPreference(
633 compilation_, preference_);
634 if (result_code != ANEURALNETWORKS_NO_ERROR) {
635 reportError(result_code);
638 result_code = libnnapi_.ANeuralNetworksCompilation_finish(compilation_);
639 if (result_code != ANEURALNETWORKS_NO_ERROR) {
640 reportError(result_code);
643 LOG(INFO) <<
"Finish compilation";
649 libnnapi_.ANeuralNetworksExecution_create(compilation_, &run_);
650 if (result_code != ANEURALNETWORKS_NO_ERROR) {
651 reportError(result_code);
653 LOG(INFO) <<
"Created model execution";
657 for (
int i = 0; i < inputs.size(); i++) {
658 result_code = libnnapi_.ANeuralNetworksExecution_setInput(
659 run_, i, NULL, inputs[i]->raw_data(), inputs[i]->size());
660 if (result_code != ANEURALNETWORKS_NO_ERROR) {
661 reportError(result_code);
664 VLOG(1) <<
"Set external input " << i <<
" at " << inputs[i]->raw_data()
665 <<
", size = " << inputs[i]->size();
668 for (
int i = 0; i < output_size; i++) {
669 const std::string& blob = run_net_.external_output(i);
670 if (operand_map_.find(blob) == operand_map_.end()) {
671 CAFFE_THROW(
"Unknown external output, ", blob);
673 uint32_t idx = operand_map_[blob];
674 if (tensor_dims_.find(blob) == tensor_dims_.end()) {
675 CAFFE_THROW(
"Operand dimension unknown");
677 std::vector<int> output_dims;
678 for (
auto dim : tensor_dims_[blob]) {
679 output_dims.push_back(dim);
682 auto* tensor = BlobGetMutableTensor(ws_.CreateBlob(blob), CPU);
683 tensor->Resize(output_dims);
684 outputs->push_back(tensor);
687 result_code = libnnapi_.ANeuralNetworksExecution_setOutput(
691 (
void*)tensor->template mutable_data<float>(),
693 if (result_code != ANEURALNETWORKS_NO_ERROR) {
694 reportError(result_code);
698 result_code = libnnapi_.ANeuralNetworksExecution_setOutput(
702 (
void*)tensor->template mutable_data<uint8_t>(),
704 if (result_code != ANEURALNETWORKS_NO_ERROR) {
705 reportError(result_code);
709 VLOG(1) <<
"Set external output " << i <<
" at " << tensor->raw_data()
710 <<
", size = " << tensor->size();
FuseCode
Fused activation function types.
Performs an 2-D convolution operation.
The following entries are used to declare tensors.
int32_t type
The data type, e.g ANEURALNETWORKS_INT8.
Performs an 2-D max pooling operation.
Performs a depthwise 2-D convolution operation.
ANeuralNetworksOperandType describes the type of an operand.
Performs a 2-D average pooling operation.
Computes rectified linear activation on the input tensor element-wise.
Fused ReLU activation function.
OperandCode
Operand types.
uint32_t dimensionCount
The number of dimensions.
A global dictionary that holds information about what Caffe2 modules have been loaded in the current ...
Computes the softmax activation on the input tensor element-wise, per batch, by normalizing the input...
A signed 32 bit integer scalar value.
A tensor of 8 bit integers that represent real numbers.
NO fused activation function.
OperationCode
Operation types.
float scale
These two fields are only used for quantized tensors.
A tensor of 32 bit integer values.
const uint32_t * dimensions
The dimensions of the tensor.