11 #include <fbgemm/QuantUtils.h> 13 #include "caffe2/quantization/server/dynamic_histogram.h" 14 #include "caffe2/utils/cpuid.h" 18 using fbgemm::RequantizationParams;
19 using fbgemm::TensorQuantizationParams;
25 enum QuantizationKind {
32 L2_MIN_QUANTIZATION_APPROX,
50 bool preserve_sparsity,
51 bool is_signed =
false)
const {
52 TensorQuantizationParams qparams = fbgemm::ChooseQuantizationParams(
55 is_signed ? -(1 << (precision - 1)) : 0,
56 is_signed ? ((1 << (precision - 1)) - 1) : (1 << precision) - 1,
58 force_scale_power_of_two_);
59 qparams.precision = precision;
66 TensorQuantizationParams
71 is_weight ? GetWeightPrecision() : GetActivationPrecision(),
72 is_weight ? GetPreserveWeightSparsity()
73 : GetPreserveActivationSparsity());
81 QuantizationKind kind,
83 bool preserve_sparsity)
const;
88 bool is_weight =
false)
const;
94 QuantizationKind kind,
96 bool preserve_sparsity)
const;
100 bool is_weight =
false)
const;
119 RequantizationParams ChooseRequantizationMultiplier(
120 float real_multiplier,
121 TensorQuantizationParams target_qparams)
const;
123 int GetActivationPrecision()
const {
124 return activation_precision_;
127 int GetWeightPrecision()
const {
128 return weight_precision_;
131 int GetEltwiseQuantizePrecision()
const {
132 return eltwise_quantize_precision_;
135 bool GetPreserveActivationSparsity()
const {
136 return preserve_activation_sparsity_;
139 bool GetPreserveWeightSparsity()
const {
140 return preserve_weight_sparsity_;
143 QuantizationKind GetActivationKind()
const {
144 return activation_kind_;
146 QuantizationKind GetWeightKind()
const {
151 int activation_precision = 8,
153 int weight_precision = 8,
154 int requantization_multiplier_precision = 32,
156 int eltwise_quantize_precision = 16,
158 bool preserve_activation_sparsity =
false,
160 bool preserve_weight_sparsity =
false,
162 bool force_scale_power_of_two =
false,
164 QuantizationKind activation_kind = MIN_MAX_QUANTIZATION,
165 QuantizationKind weight_kind = MIN_MAX_QUANTIZATION);
168 int activation_precision_;
169 int weight_precision_;
170 int requantization_multiplier_precision_;
171 int eltwise_quantize_precision_;
172 bool preserve_activation_sparsity_;
173 bool preserve_weight_sparsity_;
174 bool force_scale_power_of_two_;
175 QuantizationKind activation_kind_, weight_kind_;
181 QuantizationFactory::QuantizationKind StringToKind(
const std::string& s);
TensorQuantizationParams ChooseQuantizationParams(float min, float max, int precision, bool preserve_sparsity, bool is_signed=false) const
Choose quantization scale and zero_point that maps floating-point range [min, max] to the integer ran...
TensorQuantizationParams ChooseQuantizationParams(float min, float max, bool is_weight=false) const
Choose quantization scale and zero_point that maps floating-point range [min, max] to the default int...
static QuantizationFactory * GetDefaultInstance()
Get the default factory whose policy is determined by gflags.
bin_width = (max - min)/nbins ith bin (zero-based indexing) contains [i*bin_width, (i+1)*bin_width) with an exception that (nbins - 1)th bin contains [(nbins-1)*bin_width, nbins*bin_width]