1 #ifndef CAFFE2_OPERATORS_INT8_FC_OP_H_ 2 #define CAFFE2_OPERATORS_INT8_FC_OP_H_ 6 #include "caffe2/core/context.h" 7 #include "caffe2/core/operator.h" 8 #include "caffe2/core/tensor_int8.h" 9 #include "caffe2/operators/conv_op_shared.h" 10 #include "caffe2/operators/quantized/int8_utils.h" 20 createSharedBuffer<CPUContext>(ws_);
24 if (this->qnnpackObject_ !=
nullptr) {
25 qnnp_delete_operator(this->qnnpackObject_);
26 this->qnnpackObject_ =
nullptr;
30 bool RunOnDevice()
override {
35 int32_t Y_offset = this->
template GetSingleArgument<int>(
"Y_zero_point", 0);
36 auto Y_scale = this->
template GetSingleArgument<float>(
"Y_scale", 1);
38 Y->zero_point = Y_offset;
40 const auto K = X.t.size_from_dim(1);
41 const auto N = W.t.size(0);
42 CHECK_EQ(K, W.t.size(1));
43 CHECK_EQ(N, B.t.numel());
44 const auto M = X.t.numel() / K;
47 runWithSharedBuffer<CPUContext>(ws_, [&](
Tensor* buffer) {
50 pthreadpool_t threadpool =
51 reinterpret_cast<pthreadpool_t
>(ws_->GetThreadPool());
53 if (this->qnnpackObject_ ==
nullptr) {
54 const qnnp_status createStatus = qnnp_create_fully_connected_nc_q8(
62 W.t.template data<uint8_t>(),
63 B.t.template data<int32_t>(),
71 std::numeric_limits<uint8_t>::min(),
72 std::numeric_limits<uint8_t>::max(),
74 &this->qnnpackObject_);
76 createStatus == qnnp_status_success,
77 "failed to create QNNPACK fully connected operator");
78 CAFFE_ENFORCE(this->qnnpackObject_ !=
nullptr);
81 uint8_t* inputPtr = X.t.template mutable_data<uint8_t>();
83 buffer->Resize(std::vector<int64_t>{X.t.numel() + 8});
84 inputPtr = buffer->template mutable_data<uint8_t>() + 8;
85 memcpy(inputPtr, X.t.template data<uint8_t>(), X.t.numel());
88 if (lastBatchSize_ != static_cast<size_t>(M) ||
89 lastInputPointer_ != inputPtr ||
90 lastOutputPointer_ != Y->t.template mutable_data<uint8_t>()) {
91 const qnnp_status setupStatus = qnnp_setup_fully_connected_nc_q8(
96 Y->t.template mutable_data<uint8_t>(),
99 setupStatus == qnnp_status_success,
100 "failed to setup QNNPACK fully connected operator");
102 lastBatchSize_ =
static_cast<size_t>(M);
103 lastInputPointer_ = inputPtr;
104 lastOutputPointer_ = Y->t.template mutable_data<uint8_t>();
108 const qnnp_status runStatus =
109 qnnp_run_operator(this->qnnpackObject_,
nullptr );
111 const qnnp_status runStatus =
112 qnnp_run_operator(this->qnnpackObject_, threadpool);
115 runStatus == qnnp_status_success,
"failed to run QNNPACK operator");
123 qnnp_operator_t qnnpackObject_{
nullptr};
125 size_t lastBatchSize_{0};
127 const void* lastInputPointer_{
nullptr};
129 void* lastOutputPointer_{
nullptr};
136 #endif // CAFFE2_OPERATORS_INT8_FC_OP_H_
void ReinitializeTensor(Tensor *tensor, at::IntArrayRef dims, at::TensorOptions options)
Reinitialize a Tensor to given dims and options if necessary, note that this will not do anything if ...
Workspace is a class that holds all the related objects created during runtime: (1) all blobs...
A global dictionary that holds information about what Caffe2 modules have been loaded in the current ...