Caffe2 - C++ API
A deep learning, cross platform ML framework
int8_softmax_op.h
1 #ifndef CAFFE2_OPERATORS_INT8_SOFTMAX_OP_H_
2 #define CAFFE2_OPERATORS_INT8_SOFTMAX_OP_H_
3 
4 #include <qnnpack.h>
5 
6 #include "caffe2/core/context.h"
7 #include "caffe2/core/operator.h"
8 #include "caffe2/core/tensor_int8.h"
9 #include "caffe2/operators/quantized/int8_utils.h"
10 
11 namespace caffe2 {
12 
13 namespace int8 {
14 
15 class Int8SoftmaxOp final : public Operator<CPUContext> {
16  public:
17  explicit Int8SoftmaxOp(const OperatorDef& operator_def, Workspace* ws)
18  : Operator<CPUContext>(operator_def, ws), ws_(ws) {}
19 
20  ~Int8SoftmaxOp() {
21  if (this->qnnpackOperator_ != nullptr) {
22  qnnp_delete_operator(this->qnnpackOperator_);
23  this->qnnpackOperator_ = nullptr;
24  }
25  }
26 
27 
28  bool RunOnDevice() override {
29  const auto& X = Inputs()[0]->template Get<Int8TensorCPU>();
30  auto* Y = Outputs()[0]->template GetMutable<Int8TensorCPU>();
31  const int32_t Y_zero_point =
32  this->template GetSingleArgument<int>("Y_zero_point", 0);
33  const float Y_scale =
34  this->template GetSingleArgument<float>("Y_scale", 1);
35  CHECK_EQ(Y_zero_point, 0);
36  CHECK_EQ(Y_scale, 1.0f / 256.0f);
37 
38  /*
39  * Record quantization parameters for the input, because if the op is
40  * in-place, we may overwrite these parameters later, when we set
41  * quantization parameters for output tensor.
42  */
43  const uint8_t X_zero_point = X.zero_point;
44  const float X_scale = X.scale;
45 
46  Y->scale = Y_scale;
47  Y->zero_point = Y_zero_point;
48  Y->t.ResizeLike(X.t);
49 
50  initQNNPACK();
51 
52  if (this->qnnpackOperator_ == nullptr) {
53  const qnnp_status createStatus = qnnp_create_softargmax_nc_q8(
54  X.t.numel() / X.t.size(0) /* channels */,
55  X_scale,
56  static_cast<uint8_t>(Y_zero_point), Y_scale,
57  0 /* flags */,
58  &qnnpackOperator_);
59  CAFFE_ENFORCE(
60  createStatus == qnnp_status_success,
61  "failed to create QNNPACK SoftArgMax operator");
62  CAFFE_ENFORCE(this->qnnpackOperator_ != nullptr);
63  }
64 
65  const qnnp_status setupStatus = qnnp_setup_softargmax_nc_q8(
66  this->qnnpackOperator_,
67  X.t.size(0) /* batch size */,
68  X.t.template data<uint8_t>(),
69  X.t.numel() / X.t.size(0) /* X stride */,
70  Y->t.template mutable_data<uint8_t>(),
71  X.t.numel() / X.t.size(0) /* Y stride */);
72  CAFFE_ENFORCE(
73  setupStatus == qnnp_status_success,
74  "failed to setup QNNPACK SoftArgMax operator");
75 
76 #ifdef FBCODE_CAFFE2
77  const qnnp_status runStatus =
78  qnnp_run_operator(this->qnnpackOperator_, nullptr /* thread pool */);
79 #else
80  pthreadpool_t threadpool =
81  reinterpret_cast<pthreadpool_t>(ws_->GetThreadPool());
82  const qnnp_status runStatus =
83  qnnp_run_operator(this->qnnpackOperator_, threadpool);
84 #endif
85  CAFFE_ENFORCE(
86  runStatus == qnnp_status_success,
87  "failed to run QNNPACK SoftArgMax operator");
88 
89  return true;
90  }
91 
92  private:
93  Workspace* ws_;
94  // QNNPACK SoftArgMax operator
95  qnnp_operator_t qnnpackOperator_{nullptr};
96 };
97 
98 } // namespace int8
99 
100 } // namespace caffe2
101 
102 #endif // CAFFE2_OPERATORS_INT8_SOFTMAX_OP_H_
Workspace is a class that holds all the related objects created during runtime: (1) all blobs...
Definition: workspace.h:47
A global dictionary that holds information about what Caffe2 modules have been loaded in the current ...
Definition: blob.h:13