Caffe2 - C++ API
A deep learning, cross platform ML framework
int8_fc_op.h
1 #ifndef CAFFE2_OPERATORS_INT8_FC_OP_H_
2 #define CAFFE2_OPERATORS_INT8_FC_OP_H_
3 
4 #include <qnnpack.h>
5 
6 #include "caffe2/core/context.h"
7 #include "caffe2/core/operator.h"
8 #include "caffe2/core/tensor_int8.h"
9 #include "caffe2/operators/conv_op_shared.h"
10 #include "caffe2/operators/quantized/int8_utils.h"
11 
12 namespace caffe2 {
13 
14 namespace int8 {
15 
16 class Int8FCOp final : public Operator<CPUContext> {
17  public:
18  explicit Int8FCOp(const OperatorDef& operator_def, Workspace* ws)
19  : Operator<CPUContext>(operator_def, ws), ws_(ws) {
20  createSharedBuffer<CPUContext>(ws_);
21  }
22 
23  ~Int8FCOp() {
24  if (this->qnnpackObject_ != nullptr) {
25  qnnp_delete_operator(this->qnnpackObject_);
26  this->qnnpackObject_ = nullptr;
27  }
28  }
29 
30  bool RunOnDevice() override {
31  const auto& X = Inputs()[0]->Get<Int8TensorCPU>();
32  const auto& W = Inputs()[1]->Get<Int8TensorCPU>();
33  const auto& B = Inputs()[2]->Get<Int8TensorCPU>();
34  auto* Y = Outputs()[0]->GetMutable<Int8TensorCPU>();
35  int32_t Y_offset = this->template GetSingleArgument<int>("Y_zero_point", 0);
36  auto Y_scale = this->template GetSingleArgument<float>("Y_scale", 1);
37  Y->scale = Y_scale;
38  Y->zero_point = Y_offset;
39  // (NxHxW)xC == MxK x (NxK) -> MxN
40  const auto K = X.t.size_from_dim(1);
41  const auto N = W.t.size(0);
42  CHECK_EQ(K, W.t.size(1));
43  CHECK_EQ(N, B.t.numel());
44  const auto M = X.t.numel() / K;
45  ReinitializeTensor(&Y->t, {M, N}, at::dtype<uint8_t>().device(CPU));
46 
47  runWithSharedBuffer<CPUContext>(ws_, [&](Tensor* buffer) {
48  initQNNPACK();
49 
50  pthreadpool_t threadpool =
51  reinterpret_cast<pthreadpool_t>(ws_->GetThreadPool());
52 
53  if (this->qnnpackObject_ == nullptr) {
54  const qnnp_status createStatus = qnnp_create_fully_connected_nc_q8(
55  K,
56  N,
57  X.zero_point,
58  X.scale,
59  W.zero_point,
60  W.scale,
61 #ifndef _MSC_VER
62  W.t.template data<uint8_t>(),
63  B.t.template data<int32_t>(),
64 #else
65  W.t.data<uint8_t>(),
66  B.t.data<int32_t>(),
67 #endif
68 
69  Y->zero_point,
70  Y->scale,
71  std::numeric_limits<uint8_t>::min(),
72  std::numeric_limits<uint8_t>::max(),
73  0 /* flags */,
74  &this->qnnpackObject_);
75  CAFFE_ENFORCE(
76  createStatus == qnnp_status_success,
77  "failed to create QNNPACK fully connected operator");
78  CAFFE_ENFORCE(this->qnnpackObject_ != nullptr);
79  }
80 
81  uint8_t* inputPtr = X.t.template mutable_data<uint8_t>();
82  if (K < 8) {
83  buffer->Resize(std::vector<int64_t>{X.t.numel() + 8});
84  inputPtr = buffer->template mutable_data<uint8_t>() + 8;
85  memcpy(inputPtr, X.t.template data<uint8_t>(), X.t.numel());
86  }
87 
88  if (lastBatchSize_ != static_cast<size_t>(M) ||
89  lastInputPointer_ != inputPtr ||
90  lastOutputPointer_ != Y->t.template mutable_data<uint8_t>()) {
91  const qnnp_status setupStatus = qnnp_setup_fully_connected_nc_q8(
92  this->qnnpackObject_,
93  M,
94  inputPtr,
95  K /* input stride */,
96  Y->t.template mutable_data<uint8_t>(),
97  N /* output stride */);
98  CAFFE_ENFORCE(
99  setupStatus == qnnp_status_success,
100  "failed to setup QNNPACK fully connected operator");
101 
102  lastBatchSize_ = static_cast<size_t>(M);
103  lastInputPointer_ = inputPtr;
104  lastOutputPointer_ = Y->t.template mutable_data<uint8_t>();
105  }
106 
107 #ifdef FBCODE_CAFFE2
108  const qnnp_status runStatus =
109  qnnp_run_operator(this->qnnpackObject_, nullptr /* thread pool */);
110 #else
111  const qnnp_status runStatus =
112  qnnp_run_operator(this->qnnpackObject_, threadpool);
113 #endif
114  CAFFE_ENFORCE(
115  runStatus == qnnp_status_success, "failed to run QNNPACK operator");
116  });
117  return true;
118  }
119 
120  private:
121  Workspace* ws_;
122  // QNNPACK convolution object
123  qnnp_operator_t qnnpackObject_{nullptr};
124  // batch size in the previous call to RunOnDeviceWithOrderNHWC
125  size_t lastBatchSize_{0};
126  // input pointer in the previous call to RunOnDeviceWithOrderNHWC
127  const void* lastInputPointer_{nullptr};
128  // output pointer in the previous call to RunOnDeviceWithOrderNHWC
129  void* lastOutputPointer_{nullptr};
130 };
131 
132 } // namespace int8
133 
134 } // namespace caffe2
135 
136 #endif // CAFFE2_OPERATORS_INT8_FC_OP_H_
Definition: any.cpp:108
void ReinitializeTensor(Tensor *tensor, at::IntArrayRef dims, at::TensorOptions options)
Reinitialize a Tensor to given dims and options if necessary, note that this will not do anything if ...
Definition: tensor.cc:127
Workspace is a class that holds all the related objects created during runtime: (1) all blobs...
Definition: workspace.h:47
A global dictionary that holds information about what Caffe2 modules have been loaded in the current ...
Definition: blob.h:13
Definition: static.cpp:58