1 #ifndef CAFFE2_OPERATORS_INT8_QUANTIZE_OP_H_ 2 #define CAFFE2_OPERATORS_INT8_QUANTIZE_OP_H_ 4 #include "caffe2/core/context.h" 5 #include "caffe2/core/operator.h" 6 #include "caffe2/core/tensor_int8.h" 7 #include "caffe2/operators/quantized/int8_simd.h" 8 #include "caffe2/operators/quantized/int8_utils.h" 21 const int32_t Y_offset) {
22 const float inv_scale = 1.0f / Y_scale;
25 const float32x4_t vinv_scale = vdupq_n_f32(inv_scale);
37 const int32x4_t voffset = vdupq_n_s32(Y_offset - 0x4B400000);
38 const float32x4_t vmagic_float = vdupq_n_f32(12582912.0f);
39 for (i = 0; i + 8 < N; i += 8) {
40 const float32x4_t vin0123 = vld1q_f32(in);
42 const float32x4_t vin4567 = vld1q_f32(in);
44 const int32x4_t vraw0123 = vaddq_s32(
46 vreinterpretq_s32_f32(
47 vaddq_f32(vmagic_float, vmulq_f32(vin0123, vinv_scale))));
48 const int32x4_t vraw4567 = vaddq_s32(
50 vreinterpretq_s32_f32(
51 vaddq_f32(vmagic_float, vmulq_f32(vin4567, vinv_scale))));
52 const int16x8_t vraw01234567 =
53 vcombine_s16(vqmovn_s32(vraw0123), vqmovn_s32(vraw4567));
54 const uint8x8_t vout01234567 = vqmovun_s16(vraw01234567);
55 vst1_u8(out, vout01234567);
60 (*out++) = QuantizeUint8(Y_scale, Y_offset, (*in++));
70 bool RunOnDevice()
override {
71 const auto& X =
Input(0);
72 auto* Y = Outputs()[0]->template GetMutable<Int8TensorCPU>();
74 int32_t Y_offset = this->
template GetSingleArgument<int>(
"Y_zero_point", 0);
75 auto Y_scale = this->
template GetSingleArgument<float>(
"Y_scale", 1);
77 Y->zero_point = Y_offset;
80 Y->t.mutable_data<uint8_t>(),
92 #endif // CAFFE2_OPERATORS_INT8_QUANTIZE_OP_H_
const Tensor & Input(int idx, DeviceType type=CPUContext::GetDeviceType())
Retrieve a non-owning reference to the input at position 'idx' for this operator. ...
A global dictionary that holds information about what Caffe2 modules have been loaded in the current ...