Caffe2 - C++ API
A deep learning, cross platform ML framework
math_cpu_base.cc
1 // Implements the math functions for CPU.
2 // The implementation in this file allows us to route the underlying numerical
3 // computation library to different compiler options (-mno-avx2 or -mavx2).
4 
5 #include <cfloat>
6 #include <cmath>
7 #include <cstdint>
8 
9 #include "common.h"
10 #include "math.h"
11 
12 using std::uint64_t;
13 using std::uint8_t;
14 
15 namespace caffe2 {
16 
17 namespace math {
18 
19 static constexpr double QEPSILON = 1e-8;
20 
21 void quantize_and_compress__base(
22  const float* input_data,
23  uint8_t* output_data,
24  uint64_t input_size,
25  uint64_t bitwidth,
26  bool random,
27  const float* random_buffer) {
28  uint64_t data_per_byte = 8 / bitwidth;
29  uint64_t tail = input_size % data_per_byte;
30  tail = tail ? data_per_byte - tail : 0;
31  uint64_t segment_size = (input_size + data_per_byte - 1) / data_per_byte;
32 
33  // basic info
34  float minimum_element = INFINITY, maximum_element = -INFINITY;
35  for (auto i = 0; i < input_size; ++i) {
36  minimum_element =
37  input_data[i] < minimum_element ? input_data[i] : minimum_element;
38  maximum_element =
39  input_data[i] > maximum_element ? input_data[i] : maximum_element;
40  }
41  output_data[0] = bitwidth;
42  output_data[1] = tail;
43  reinterpret_cast<float*>(output_data + 2)[0] = minimum_element;
44  reinterpret_cast<float*>(output_data + 2)[1] = maximum_element;
45 
46  float gap = (maximum_element - minimum_element) / ((1 << bitwidth) - 1.0f);
47  float gap_inverse = 1. / (gap + QEPSILON);
48  uint8_t max_q = (1 << bitwidth) - 1;
49  uint64_t bit_start = 0;
50  if (random) {
51  for (int start = 0; start < input_size; start += segment_size) {
52  uint64_t stride = start + segment_size <= input_size ? segment_size
53  : input_size - start;
54  int i = 0;
55  for (; i < stride; ++i) {
56  float fval = input_data[start + i];
57  float thetimes = (fval - minimum_element) * gap_inverse;
58  float rounded = floor(thetimes + random_buffer[start + i]);
59  rounded = rounded < static_cast<float>(max_q)
60  ? rounded
61  : static_cast<float>(max_q);
62  rounded = rounded > 0.0f ? rounded : 0.0f;
63  uint8_t qval = rounded;
64 
65  uint8_t orval = output_data[10 + i];
66  output_data[10 + i] = orval | static_cast<uint8_t>(qval << bit_start);
67  }
68  bit_start += bitwidth;
69  }
70  } else {
71  for (int start = 0; start < input_size; start += segment_size) {
72  uint64_t stride = start + segment_size <= input_size ? segment_size
73  : input_size - start;
74  int i = 0;
75  for (; i < stride; ++i) {
76  float fval = input_data[start + i];
77  float thetimes = (fval - minimum_element) * gap_inverse;
78  thetimes = thetimes < static_cast<float>(max_q)
79  ? thetimes
80  : static_cast<float>(max_q);
81  thetimes = thetimes > 0.0f ? thetimes : 0.0f;
82  uint8_t qval = nearbyint(thetimes);
83 
84  uint8_t orval = output_data[10 + i];
85  output_data[10 + i] = orval | static_cast<uint8_t>(qval << bit_start);
86  }
87  bit_start += bitwidth;
88  }
89  }
90 }
91 
92 decltype(quantize_and_compress__base) quantize_and_compress__avx2;
93 void quantize_and_compress(
94  const float* input_data,
95  uint8_t* output_data,
96  uint64_t input_size,
97  uint64_t bitwidth,
98  bool random,
99  const float* random_buffer) {
100  AVX2_DO(
101  quantize_and_compress,
102  input_data,
103  output_data,
104  input_size,
105  bitwidth,
106  random,
107  random_buffer);
108  BASE_DO(
109  quantize_and_compress,
110  input_data,
111  output_data,
112  input_size,
113  bitwidth,
114  random,
115  random_buffer);
116 }
117 
118 void decompress_and_dequantize__base(
119  const uint8_t* input_data,
120  float* output_data,
121  uint64_t input_size) {
122  // basic info
123  const float minimum_element =
124  reinterpret_cast<const float*>(input_data + 2)[0];
125  const float maximum_element =
126  reinterpret_cast<const float*>(input_data + 2)[1];
127  const uint64_t bitwidth = input_data[0];
128  const float gap =
129  (maximum_element - minimum_element) / ((1 << bitwidth) - 1.f) +
130  QEPSILON; // for exact recovering
131 
132  const uint64_t tail = input_data[1];
133 
134  const uint64_t output_size = (input_size - 10) * (8 / bitwidth) - tail;
135  // decoding
136  uint64_t bit_start = 0;
137  const uint64_t segment_size = input_size - 10;
138  for (int start = 0; start < output_size; start += segment_size) {
139  uint64_t stride = start + segment_size <= output_size ? segment_size
140  : output_size - start;
141  uint8_t mask = (1 << bitwidth) - 1;
142  int i = 0;
143  for (; i < stride; ++i) {
144  output_data[start + i] =
145  ((input_data[10 + i] >> bit_start) & mask) * gap + minimum_element;
146  }
147  bit_start += bitwidth;
148  }
149 }
150 
151 decltype(decompress_and_dequantize__base) decompress_and_dequantize__avx2;
152 void decompress_and_dequantize(
153  const uint8_t* input_data,
154  float* output_data,
155  uint64_t input_size) {
156  AVX2_DO(decompress_and_dequantize, input_data, output_data, input_size);
157  BASE_DO(decompress_and_dequantize, input_data, output_data, input_size);
158 }
159 
160 } // namespace math
161 } // namespace caffe2
A global dictionary that holds information about what Caffe2 modules have been loaded in the current ...
Definition: blob.h:13