3 #include <fbgemm/Fbgemm.h> 4 #include "caffe2/operators/fully_connected_op.h" 5 #include "caffe2/quantization/server/dnnlowp_op.h" 11 :
public DNNLowPOp<T, FullyConnectedOp<CPUContext>> {
14 bool RunOnDevice()
override;
20 bool GetQuantizationParameters_();
23 std::size_t axis_w_{1};
24 vector<std::int64_t> Y_shape_cache_;
26 std::vector<dnnlowp::RequantizationParams> requantization_params_;
27 bool requantization_param_selected_{
false};
31 using T_signed =
typename std::make_signed<T>::type;
34 std::shared_ptr<fbgemm::PackBMatrix<std::int8_t>> Wq_packed_;
35 std::vector<std::uint8_t> X_pack_buf_;
37 std::vector<std::int32_t> Y_int32_;
38 std::vector<dnnlowp::TensorQuantizationParams> filter_qparams_;
39 std::vector<float> filter_scales_;
40 std::vector<std::int32_t> filter_zero_points_;
42 std::vector<float> requantization_multipliers_;
43 bool quantize_channelwise_;
46 std::vector<T_signed> W_quantized_;
49 std::shared_ptr<std::vector<std::int32_t>> b_quantized_;
50 const std::int32_t* b_quantized_data_{
nullptr};
51 std::vector<std::int32_t> row_offsets_;
52 std::shared_ptr<std::vector<std::int32_t>> column_offsets_;
56 std::vector<float> b_dequantized_;
57 const float* b_dequantized_data_{
nullptr};
59 bool is_weight_constant_{
true};
61 float in_qparams0_scale_old_ = 0;
62 std::int32_t in_qparams0_zero_point_old_ = 0;
The CPU Context, representing the bare minimum of what a Context class in Caffe2 should implement...
Workspace is a class that holds all the related objects created during runtime: (1) all blobs...
A global dictionary that holds information about what Caffe2 modules have been loaded in the current ...
A convenient base class for C2 operators with DNNLOWP engine.