23 Tanh(
double max_abs_err = DEFAULT_MAX_ABS_ERR);
27 TensorQuantizationParams GetInputQuantizationParams()
const {
30 TensorQuantizationParams GetOutputQuantizationParams()
const {
34 int GetPassRegionEnd()
const {
38 float GetPassRegionEndDequantized()
const {
39 return fbgemm::Dequantize<T>(
40 static_cast<uint8_t
>(x_pq_index_ + in_qparams_.zero_point),
44 float GetSaturationRegionBegin()
const {
45 return fbgemm::Dequantize<T>(
46 static_cast<T>((1 << num_in_bits_) - 1), in_qparams_);
49 static constexpr
double DEFAULT_MAX_ABS_ERR = 0.02;
50 static constexpr
int DEFAULT_NUM_IN_BITS = 8;
51 static constexpr
int DEFAULT_NUM_OUT_BITS = 8;
54 const double max_abs_err_;
55 const int num_in_bits_ = DEFAULT_NUM_IN_BITS;
56 const int num_out_bits_ = DEFAULT_NUM_OUT_BITS;
59 std::vector<T> processing_region_lut_;
60 TensorQuantizationParams in_qparams_, out_qparams_;
We use the 3-region approach described in "Efficient VLSI Implementation of Neural Networks with Hype...