Caffe2 - C++ API
A deep learning, cross platform ML framework
snpe_ffi.cc
1 
17 #include "snpe_ffi.h"
18 
19 #include "DiagLog/IDiagLog.hpp"
20 #include "zdl/DlContainer/IDlContainer.hpp"
21 #include "zdl/DlSystem/ITensorFactory.hpp"
22 #include "zdl/DlSystem/DlError.hpp"
23 #include "zdl/SNPE/SNPE.hpp"
24 #include "zdl/SNPE/SNPEBuilder.hpp"
25 #include "zdl/SNPE/SNPEFactory.hpp"
26 
27 // Stringify input.
28 #define S_(x) #x
29 #define S(x) S_(x)
30 
31 #define SNPE_ENFORCE(condition) \
32  do { \
33  if (!(condition)) { \
34  throw std::runtime_error(std::string("Exception in SNPE: ") + std::string(__FILE__) + \
35  std::string(":") + std::string(S(__LINE__)) + \
36  zdl::DlSystem::getLastErrorString()); \
37  } \
38  } while (0);
39 
40 struct SNPEContext {
41  public:
42  SNPEContext(const std::vector<uint8_t>& buffer, const char* input_name, bool enable_logging=false) {
43  container_ = zdl::DlContainer::IDlContainer::open(buffer);
44  SNPE_ENFORCE(container_);
45 
46  zdl::SNPE::SNPEBuilder snpeBuilder(container_.get());
47  SNPE_ENFORCE(zdl::SNPE::SNPEFactory::isRuntimeAvailable(zdl::DlSystem::Runtime_t::GPU));
48 
49  dnn_ = snpeBuilder.setOutputLayers({}) // Just the last one is fine.
50  .setRuntimeProcessor(zdl::DlSystem::Runtime_t::GPU)
51  .setPerformanceProfile(zdl::DlSystem::PerformanceProfile_t::HIGH_PERFORMANCE)
52  .build();
53 
54  if (enable_logging) {
55  auto logger_opt = dnn_->getDiagLogInterface();
56  if (!logger_opt) throw std::runtime_error("SNPE failed to obtain logging interface");
57  auto logger = *logger_opt;
58  auto opts = logger->getOptions();
59  opts.LogFileDirectory = "/data/local/tmp/";
60  SNPE_ENFORCE(logger->setOptions(opts));
61  SNPE_ENFORCE(logger->start());
62  }
63 
64  SNPE_ENFORCE(dnn_);
65 
66  inputDims_ = dnn_->getInputDimensions(input_name);
67 
68  inputTensor_ = zdl::SNPE::SNPEFactory::getTensorFactory().createTensor(inputDims_);
69 
70  SNPE_ENFORCE(dnn_->getOutputLayerNames() && (*dnn_->getOutputLayerNames()).size() >= 1);
71  }
72 
73  const zdl::DlSystem::Optional<zdl::DlSystem::TensorShape>& getInputDims() const { return inputDims_; };
74 
75  const std::vector<std::vector<size_t>>& run(const float* inputData, size_t count) {
76  SNPE_ENFORCE(inputData);
77 
78  // Copy input data.
79  memcpy(inputTensor_->begin().dataPointer(), inputData, (count * sizeof(float)));
80  SNPE_ENFORCE(inputTensor_.get());
81 
82  // Execute graph in the SNPE runtime.
83  SNPE_ENFORCE(dnn_->execute(inputTensor_.get(), outputTensors_));
84 
85  SNPE_ENFORCE(outputTensors_.size() >= 1);
86  for (auto name : outputTensors_.getTensorNames()) {
87  const auto& outputTensor = outputTensors_.getTensor(name);
88  auto dims = outputTensor->getShape().getDimensions();
89  outputDims_.push_back(std::vector<size_t>(dims, dims + outputTensor->getShape().rank()));
90  }
91 
92  return outputDims_;
93  }
94 
95  void copyOutputTo(float* outputData) {
96  const auto& outputTensor = outputTensors_.getTensor(*outputTensors_.getTensorNames().begin());
97  SNPE_ENFORCE(outputTensor);
98  memcpy(outputData, outputTensor->begin().dataPointer(), (outputTensor->getSize() * sizeof(float)));
99  }
100 
101  private:
102  std::shared_ptr<zdl::DlContainer::IDlContainer> container_;
103  std::shared_ptr<zdl::SNPE::SNPE> dnn_;
104  zdl::DlSystem::Optional<zdl::DlSystem::TensorShape> inputDims_;
105  std::vector<std::vector<size_t>> outputDims_;
106  std::shared_ptr<zdl::DlSystem::ITensor> inputTensor_;
107  zdl::DlSystem::TensorMap outputTensors_;
108 };
109 
110 extern "C" {
111 
112 bool snpe_has_gpu() {
113  return zdl::SNPE::SNPEFactory::isRuntimeAvailable(zdl::DlSystem::Runtime_t::GPU);
114 }
115 
116 void* snpe_create(const uint8_t* container, size_t size, const char* input_name) {
117  std::vector<uint8_t> buffer(container, container + size);
118  return new SNPEContext(buffer, input_name);
119 }
120 
121 void snpe_destroy(void* ctx) { delete ((SNPEContext*)ctx); }
122 
123 void snpe_get_input_dims(void* ctx, size_t const** dims, size_t* size) {
124  const auto& inputDims = ((SNPEContext*)ctx)->getInputDims();
125  *dims = (*inputDims).getDimensions();
126  *size = (*inputDims).rank();
127 }
128 
129 void snpe_run(void* ctx,
130  const float* inputData,
131  size_t inputSize,
132  size_t const** outputDims,
133  size_t* outputSize) {
134 
135  const auto& outputDims_ = ((SNPEContext*)ctx)->run(inputData, inputSize);
136  SNPE_ENFORCE(outputDims_.size() >= 1);
137 
138  *outputDims = outputDims_[0].data();
139  *outputSize = outputDims_[0].size();
140 }
141 
142 void snpe_copy_output_to(void* ctx, float* outputData) {
143  ((SNPEContext*)ctx)->copyOutputTo(outputData);
144 }
145 
146 } // extern "C"
147