3 #include "caffe2/quantization/server/conv_dnnlowp_op.h" 4 #include "fbgemm/Fbgemm.h" 12 template <
bool ReluFused = false>
20 using BaseType::col_buffer_;
21 using BaseType::FILTER;
22 using BaseType::in_qparams_;
23 using BaseType::INPUT;
24 using BaseType::InputTensorCPU_;
25 using BaseType::out_qparams_;
26 using BaseType::OutputTensorCPU_;
27 using BaseType::row_offsets_;
28 using BaseType::W_quantized_;
29 using BaseType::X_pack_buf_;
30 using BaseType::Y_int32_;
33 bool RunOnDeviceWithOrderNCHW()
override;
34 bool RunOnDeviceWithOrderNHWC()
override;
36 bool GetQuantizationParameters_();
38 template <fbgemm::QuantizationGranularity Q_GRAN>
40 fbgemm::PackAWithRowOffset<std::uint8_t, std::int16_t>& packA,
41 const std::uint8_t* col_buffer_data,
42 vector<std::int32_t>* Y_int32,
43 uint8_t* Y_uint8_data);
46 const std::uint8_t* col_buffer,
47 vector<std::int32_t>* Y_int32);
49 virtual bool Acc16()
const override {
50 return !fallback_to_32_bit_accumulation_;
53 std::shared_ptr<fbgemm::PackBMatrix<std::int8_t, std::int16_t>>
57 std::shared_ptr<fbgemm::CompressedSparseColumn> Wq_outlier_;
64 int nbits_in_non_outlier_;
65 int copy_to_32bit_frequency_;
67 bool first_invocation_{
true};
71 bool fallback_to_32_bit_accumulation_{
false};
The CPU Context, representing the bare minimum of what a Context class in Caffe2 should implement...
Quantized Conv operator with 16-bit accumulation.
Workspace is a class that holds all the related objects created during runtime: (1) all blobs...
A global dictionary that holds information about what Caffe2 modules have been loaded in the current ...