Caffe2 - C++ API
A deep learning, cross platform ML framework
fused_rowwise_8bit_conversion_ops.h
1 #ifndef CAFFE2_OPERATORS_FUSED_ROWWISE_8BIT_CONVERSION_OPS_H_
2 #define CAFFE2_OPERATORS_FUSED_ROWWISE_8BIT_CONVERSION_OPS_H_
3 
4 #include "caffe2/core/context.h"
5 #include "caffe2/core/logging.h"
6 #include "caffe2/core/operator.h"
7 #include "caffe2/operators/reducer_functors.h"
8 #include "caffe2/utils/eigen_utils.h"
9 #include "caffe2/utils/math.h"
10 
11 namespace caffe2 {
12 
13 #define IS_LITTLE_ENDIAN \
14  [] { \
15  const int32_t kValue = 1; \
16  return reinterpret_cast<const uint8_t*>(&kValue)[0] == 1; \
17  }()
18 
19 template <
20  typename T,
21  void (*convert)(float* dst, const T* src, size_t N),
22  class Context>
24  public:
25  static constexpr float kEpsilon = 1e-8f;
26 
27  USE_OPERATOR_CONTEXT_FUNCTIONS;
28  USE_SIMPLE_CTOR_DTOR(FloatToFused8BitRowwiseQuantizedOp)
29 
30  bool RunOnDevice() override {
31  CAFFE_ENFORCE(IS_LITTLE_ENDIAN, "Unsupported endianness");
32 
33  const auto& input = Input(DATA_FLOAT);
34 
35  const auto input_rows = input.size(0);
36  const auto input_columns = input.size(1);
37  CAFFE_ENFORCE_EQ(input.dim(), 2, "Expect input to be a matrix");
38 
39  // The "fused" representation stores the scale and bias with the row-wise
40  // quantized data in one tensor. Since we quantize with 8 bits (1 byte) and
41  // represent the scale and bias with 32-bit floats, we'll use the last 8
42  // bytes of each row for scale (4 bytes) and bias (4 bytes).
43  // | ... int8 data ... | scale | bias |
44  // | number_of_columns | 4B | 4B |
45  const std::vector<int64_t> output_dimensions = {input_rows,
46  input_columns + 8};
47  auto* output = Output(
48  DATA_FUSED_SCALE_BIAS_INT8, output_dimensions, at::dtype<uint8_t>());
49 
50  const auto* input_data = input.template data<T>();
51  auto* output_data = output->template mutable_data<uint8_t>();
52  const auto output_columns = output->size(1);
53 
54  if (!std::is_same<T, float>::value && !std::is_same<T, at::Half>::value) {
55  CAFFE_THROW("Unsupported data type");
56  }
57 
58  vector<float> tmp;
59  tmp.resize(input_columns, 0.0);
60 
61  for (size_t row = 0; row < input_rows; ++row) {
62  convert(tmp.data(), input_data + row * input_columns, input_columns);
63  ConstEigenVectorArrayMap<float> input_row(tmp.data(), input_columns);
64  uint8_t* output_row = output_data + row * output_columns;
65  EigenVectorArrayMap<uint8_t> output_row_values(output_row, input_columns);
66  EigenVectorArrayMap<float> output_row_scale_bias(
67  reinterpret_cast<float*>(output_row + input_columns), 2);
68 
69  const float minimum_element = input_row.minCoeff();
70  const float maximum_element = input_row.maxCoeff();
71  const float range = maximum_element - minimum_element;
72 
73  output_row_scale_bias(0) = range / 255.0f;
74  output_row_scale_bias(1) = minimum_element;
75  const auto inverse_scale = 255.0f / (range + kEpsilon);
76  output_row_values = ((input_row - minimum_element) * inverse_scale)
77  .round()
78  .cast<uint8_t>();
79  }
80 
81  return true;
82  }
83 
84  private:
85  INPUT_TAGS(DATA_FLOAT);
86  OUTPUT_TAGS(DATA_FUSED_SCALE_BIAS_INT8);
87 };
88 
89 template <
90  typename T,
91  void (*convert)(T* dst, const float* src, size_t N),
92  class Context>
94  public:
95  USE_OPERATOR_CONTEXT_FUNCTIONS;
96  USE_SIMPLE_CTOR_DTOR(Fused8BitRowwiseQuantizedToFloatOp)
97 
98  bool RunOnDevice() override {
99  CAFFE_ENFORCE(IS_LITTLE_ENDIAN, "Unsupported endianness");
100 
101  const auto& input = Input(DATA_FUSED_SCALE_BIAS_INT8);
102 
103  const auto input_rows = input.size(0);
104  const auto input_columns = input.size(1);
105  CAFFE_ENFORCE_EQ(input.dim(), 2, "Expect input to be a matrix");
106 
107  // The last 8 bytes per row are the scale and the bias. The rest of
108  // input_columns is the number of values in the original row.
109  const std::vector<int64_t> output_dimensions = {input_rows,
110  input_columns - 8};
111  auto* output = Output(DATA_FLOAT, output_dimensions, at::dtype<T>());
112  const auto output_columns = output->size(1);
113 
114  const auto* input_data = input.template data<uint8_t>();
115  T* output_data = output->template mutable_data<T>();
116 
117  vector<float> tmp;
118  tmp.resize(input_columns, 0.0);
119 
120  for (size_t row = 0; row < input_rows; ++row) {
121  const uint8_t* input_row = input_data + row * input_columns;
122  ConstEigenVectorArrayMap<uint8_t> input_row_values(
123  input_row, output_columns);
124  ConstEigenVectorArrayMap<float> input_row_scale_bias(
125  reinterpret_cast<const float*>(input_row + output_columns), 2);
126 
127  EigenVectorArrayMap<float> output_row(tmp.data(), output_columns);
128  output_row = input_row_values.cast<float>() * input_row_scale_bias(0) +
129  input_row_scale_bias(1);
130 
131  convert(output_data + row * output_columns, tmp.data(), output_columns);
132  }
133  return true;
134  }
135 
136  private:
137  INPUT_TAGS(DATA_FUSED_SCALE_BIAS_INT8);
138  OUTPUT_TAGS(DATA_FLOAT);
139 };
140 
141 #undef IS_LITTLE_ENDIAN
142 
143 } // namespace caffe2
144 
145 #endif // CAFFE2_OPERATORS_FUSED_ROWWISE_8BIT_CONVERSION_OPS_H_
A global dictionary that holds information about what Caffe2 modules have been loaded in the current ...
Definition: blob.h:13