Caffe2 - C++ API
A deep learning, cross platform ML framework
snpe_ffi.cc
1 #include "snpe_ffi.h"
2 
3 #include "DiagLog/IDiagLog.hpp"
4 #include "zdl/DlContainer/IDlContainer.hpp"
5 #include "zdl/DlSystem/ITensorFactory.hpp"
6 #include "zdl/DlSystem/DlError.hpp"
7 #include "zdl/SNPE/SNPE.hpp"
8 #include "zdl/SNPE/SNPEBuilder.hpp"
9 #include "zdl/SNPE/SNPEFactory.hpp"
10 
11 // Stringify input.
12 #define S_(x) #x
13 #define S(x) S_(x)
14 
15 #define SNPE_ENFORCE(condition) \
16  do { \
17  if (!(condition)) { \
18  throw std::runtime_error(std::string("Exception in SNPE: ") + std::string(__FILE__) + \
19  std::string(":") + std::string(S(__LINE__)) + \
20  zdl::DlSystem::getLastErrorString()); \
21  } \
22  } while (0);
23 
24 struct SNPEContext {
25  public:
26  SNPEContext(const std::vector<uint8_t>& buffer, const char* input_name, bool enable_logging=false) {
27  container_ = zdl::DlContainer::IDlContainer::open(buffer);
28  SNPE_ENFORCE(container_);
29 
30  zdl::SNPE::SNPEBuilder snpeBuilder(container_.get());
31  SNPE_ENFORCE(zdl::SNPE::SNPEFactory::isRuntimeAvailable(zdl::DlSystem::Runtime_t::GPU));
32 
33  dnn_ = snpeBuilder.setOutputLayers({}) // Just the last one is fine.
34  .setRuntimeProcessor(zdl::DlSystem::Runtime_t::GPU)
35  .setPerformanceProfile(zdl::DlSystem::PerformanceProfile_t::HIGH_PERFORMANCE)
36  .build();
37 
38  if (enable_logging) {
39  auto logger_opt = dnn_->getDiagLogInterface();
40  if (!logger_opt) throw std::runtime_error("SNPE failed to obtain logging interface");
41  auto logger = *logger_opt;
42  auto opts = logger->getOptions();
43  opts.LogFileDirectory = "/data/local/tmp/";
44  SNPE_ENFORCE(logger->setOptions(opts));
45  SNPE_ENFORCE(logger->start());
46  }
47 
48  SNPE_ENFORCE(dnn_);
49 
50  inputDims_ = dnn_->getInputDimensions(input_name);
51 
52  inputTensor_ = zdl::SNPE::SNPEFactory::getTensorFactory().createTensor(inputDims_);
53 
54  SNPE_ENFORCE(dnn_->getOutputLayerNames() && (*dnn_->getOutputLayerNames()).size() >= 1);
55  }
56 
57  const zdl::DlSystem::Optional<zdl::DlSystem::TensorShape>& getInputDims() const { return inputDims_; };
58 
59  const std::vector<std::vector<size_t>>& run(const float* inputData, size_t count) {
60  SNPE_ENFORCE(inputData);
61 
62  // Copy input data.
63  memcpy(inputTensor_->begin().dataPointer(), inputData, (count * sizeof(float)));
64  SNPE_ENFORCE(inputTensor_.get());
65 
66  // Execute graph in the SNPE runtime.
67  SNPE_ENFORCE(dnn_->execute(inputTensor_.get(), outputTensors_));
68 
69  SNPE_ENFORCE(outputTensors_.size() >= 1);
70  for (auto name : outputTensors_.getTensorNames()) {
71  const auto& outputTensor = outputTensors_.getTensor(name);
72  auto dims = outputTensor->getShape().getDimensions();
73  outputDims_.push_back(std::vector<size_t>(dims, dims + outputTensor->getShape().rank()));
74  }
75 
76  return outputDims_;
77  }
78 
79  void copyOutputTo(float* outputData) {
80  const auto& outputTensor = outputTensors_.getTensor(*outputTensors_.getTensorNames().begin());
81  SNPE_ENFORCE(outputTensor);
82  memcpy(outputData, outputTensor->begin().dataPointer(), (outputTensor->getSize() * sizeof(float)));
83  }
84 
85  private:
86  std::shared_ptr<zdl::DlContainer::IDlContainer> container_;
87  std::shared_ptr<zdl::SNPE::SNPE> dnn_;
88  zdl::DlSystem::Optional<zdl::DlSystem::TensorShape> inputDims_;
89  std::vector<std::vector<size_t>> outputDims_;
90  std::shared_ptr<zdl::DlSystem::ITensor> inputTensor_;
91  zdl::DlSystem::TensorMap outputTensors_;
92 };
93 
94 extern "C" {
95 
96 bool snpe_has_gpu() {
97  return zdl::SNPE::SNPEFactory::isRuntimeAvailable(zdl::DlSystem::Runtime_t::GPU);
98 }
99 
100 void* snpe_create(const uint8_t* container, size_t size, const char* input_name) {
101  std::vector<uint8_t> buffer(container, container + size);
102  return new SNPEContext(buffer, input_name);
103 }
104 
105 void snpe_destroy(void* ctx) { delete ((SNPEContext*)ctx); }
106 
107 void snpe_get_input_dims(void* ctx, size_t const** dims, size_t* size) {
108  const auto& inputDims = ((SNPEContext*)ctx)->getInputDims();
109  *dims = (*inputDims).getDimensions();
110  *size = (*inputDims).rank();
111 }
112 
113 void snpe_run(void* ctx,
114  const float* inputData,
115  size_t inputSize,
116  size_t const** outputDims,
117  size_t* outputSize) {
118 
119  const auto& outputDims_ = ((SNPEContext*)ctx)->run(inputData, inputSize);
120  SNPE_ENFORCE(outputDims_.size() >= 1);
121 
122  *outputDims = outputDims_[0].data();
123  *outputSize = outputDims_[0].size();
124 }
125 
126 void snpe_copy_output_to(void* ctx, float* outputData) {
127  ((SNPEContext*)ctx)->copyOutputTo(outputData);
128 }
129 
130 } // extern "C"
131