Caffe2 - C++ API
A deep learning, cross platform ML framework
int8_quantize_op.h
1 #ifndef CAFFE2_OPERATORS_INT8_QUANTIZE_OP_H_
2 #define CAFFE2_OPERATORS_INT8_QUANTIZE_OP_H_
3 
4 #include "caffe2/core/context.h"
5 #include "caffe2/core/operator.h"
6 #include "caffe2/core/tensor_int8.h"
7 #include "caffe2/operators/quantized/int8_simd.h"
8 #include "caffe2/operators/quantized/int8_utils.h"
9 
10 namespace caffe2 {
11 
12 namespace int8 {
13 
14 namespace {
15 
16 void Int8Quantize(
17  const float* in,
18  uint8_t* out,
19  const int64_t N,
20  const float Y_scale,
21  const int32_t Y_offset) {
22  const float inv_scale = 1.0f / Y_scale;
23  uint32_t i = 0;
24 #ifdef INT8_NEON_SIMD
25  const float32x4_t vinv_scale = vdupq_n_f32(inv_scale);
26  // magic float and magic int to take care of rounding
27  // int magic_round(float f): interpret_int32(f + 12582912.0f) - 0x4B400000
28  // Some detail:
29  // 12582912.0f is 2**23 + 2**22. The trick is based on the fact that when you
30  // add a small number to a large number, the result rounds to the precision of
31  // the least significant bit of the large number. For IEEE-754
32  // single-precision number mantissa has 23 bits, and adding 2**23 would cause
33  // rounding to the nearest even integer. The we cast to int and subtract the
34  // same number (0x4B400000 is the integer representation of 12582912.0f) to
35  // get only the mantissa. This works if -2**22 < x < 2**22, but preserves the
36  // sign for negative numbers.
37  const int32x4_t voffset = vdupq_n_s32(Y_offset - 0x4B400000);
38  const float32x4_t vmagic_float = vdupq_n_f32(12582912.0f);
39  for (i = 0; i + 8 < N; i += 8) {
40  const float32x4_t vin0123 = vld1q_f32(in);
41  in += 4;
42  const float32x4_t vin4567 = vld1q_f32(in);
43  in += 4;
44  const int32x4_t vraw0123 = vaddq_s32(
45  voffset,
46  vreinterpretq_s32_f32(
47  vaddq_f32(vmagic_float, vmulq_f32(vin0123, vinv_scale))));
48  const int32x4_t vraw4567 = vaddq_s32(
49  voffset,
50  vreinterpretq_s32_f32(
51  vaddq_f32(vmagic_float, vmulq_f32(vin4567, vinv_scale))));
52  const int16x8_t vraw01234567 =
53  vcombine_s16(vqmovn_s32(vraw0123), vqmovn_s32(vraw4567));
54  const uint8x8_t vout01234567 = vqmovun_s16(vraw01234567);
55  vst1_u8(out, vout01234567);
56  out += 8;
57  }
58 #endif
59  for (; i < N; ++i) {
60  (*out++) = QuantizeUint8(Y_scale, Y_offset, (*in++));
61  }
62 }
63 
64 } // namespace
65 
66 class Int8QuantizeOp final : public Operator<CPUContext> {
67  public:
69 
70  bool RunOnDevice() override {
71  const auto& X = Input(0);
72  auto* Y = Outputs()[0]->template GetMutable<Int8TensorCPU>();
73  Y->t.ResizeLike(X);
74  int32_t Y_offset = this->template GetSingleArgument<int>("Y_zero_point", 0);
75  auto Y_scale = this->template GetSingleArgument<float>("Y_scale", 1);
76  Y->scale = Y_scale;
77  Y->zero_point = Y_offset;
78  Int8Quantize(
79  X.data<float>(),
80  Y->t.mutable_data<uint8_t>(),
81  X.numel(),
82  Y_scale,
83  Y_offset);
84  return true;
85  }
86 };
87 
88 } // namespace int8
89 
90 } // namespace caffe2
91 
92 #endif // CAFFE2_OPERATORS_INT8_QUANTIZE_OP_H_
const Tensor & Input(int idx, DeviceType type=CPUContext::GetDeviceType())
Retrieve a non-owning reference to the input at position &#39;idx&#39; for this operator. ...
Definition: operator.h:702
A global dictionary that holds information about what Caffe2 modules have been loaded in the current ...
Definition: blob.h:13