Caffe2 - C++ API
A deep learning, cross platform ML framework
fused_rowwise_8bit_conversion_ops.h
1 
17 #ifndef CAFFE2_OPERATORS_FUSED_ROWWISE_8BIT_CONVERSION_OPS_H_
18 #define CAFFE2_OPERATORS_FUSED_ROWWISE_8BIT_CONVERSION_OPS_H_
19 
20 #include "caffe2/core/context.h"
21 #include "caffe2/core/logging.h"
22 #include "caffe2/core/operator.h"
23 #include "caffe2/operators/reducer_functors.h"
24 #include "caffe2/utils/math.h"
25 
26 namespace caffe2 {
27 
28 #define IS_LITTLE_ENDIAN \
29  [] { \
30  const int32_t kValue = 1; \
31  return reinterpret_cast<const uint8_t*>(&kValue)[0] == 1; \
32  }()
33 
34 template <class Context>
36  public:
37  static constexpr float kEqualityThreshold = 1e-7f;
38  static constexpr float kEpsilon = 1e-8f;
39 
40  USE_OPERATOR_CONTEXT_FUNCTIONS;
41  USE_SIMPLE_CTOR_DTOR(FloatToFused8BitRowwiseQuantizedOp)
42 
43  bool RunOnDevice() override {
44  CAFFE_ENFORCE(IS_LITTLE_ENDIAN, "Unsupported endianness");
45 
46  const auto& input = Input(DATA_FLOAT);
47  auto* output = Output(DATA_FUSED_SCALE_BIAS_INT8);
48 
49  const auto input_rows = input.dim(0);
50  const auto input_columns = input.dim(1);
51  CAFFE_ENFORCE_EQ(input.ndim(), 2, "Expect input to be a matrix");
52 
53  // The "fused" representation stores the scale and bias with the row-wise
54  // quantized data in one tensor. Since we quantize with 8 bits (1 byte) and
55  // represent the scale and bias with 32-bit floats, we'll use the last 8
56  // bytes of each row for scale (4 bytes) and bias (4 bytes).
57  // | ... int8 data ... | scale | bias |
58  // | number_of_columns | 4B | 4B |
59  const std::vector<TIndex> output_dimensions = {input_rows,
60  input_columns + 8};
61  output->Resize(output_dimensions);
62 
63  const auto* input_data = input.template data<float>();
64  auto* output_data = output->template mutable_data<uint8_t>();
65  const auto output_columns = output->dim(1);
66 
67  for (size_t row = 0; row < input_rows; ++row) {
68  ConstEigenVectorArrayMap<float> input_row(
69  input_data + row * input_columns, input_columns);
70 
71  uint8_t* output_row = output_data + row * output_columns;
72  EigenVectorArrayMap<uint8_t> output_row_values(output_row, input_columns);
73  EigenVectorArrayMap<float> output_row_scale_bias(
74  reinterpret_cast<float*>(output_row + input_columns), 2);
75 
76  const float minimum_element = input_row.minCoeff();
77  const float maximum_element = input_row.maxCoeff();
78  const float range = maximum_element - minimum_element;
79 
80  output_row_scale_bias(0) = range / 255.0f;
81  output_row_scale_bias(1) = minimum_element;
82  const auto inverse_scale = 255.0f / (range + kEpsilon);
83  output_row_values = ((input_row - minimum_element) * inverse_scale)
84  .round()
85  .cast<uint8_t>();
86  }
87 
88  return true;
89  }
90 
91  private:
92  INPUT_TAGS(DATA_FLOAT);
93  OUTPUT_TAGS(DATA_FUSED_SCALE_BIAS_INT8);
94 };
95 
96 template <class Context>
98  public:
99  USE_OPERATOR_CONTEXT_FUNCTIONS;
100  USE_SIMPLE_CTOR_DTOR(Fused8BitRowwiseQuantizedToFloatOp)
101 
102  bool RunOnDevice() override {
103  CAFFE_ENFORCE(IS_LITTLE_ENDIAN, "Unsupported endianness");
104 
105  const auto& input = Input(DATA_FUSED_SCALE_BIAS_INT8);
106  auto* output = Output(DATA_FLOAT);
107 
108  const auto input_rows = input.dim(0);
109  const auto input_columns = input.dim(1);
110  CAFFE_ENFORCE_EQ(input.ndim(), 2, "Expect input to be a matrix");
111 
112  // The first 4 bytes are the scale, the next 4 bytes the bias. The rest of
113  // input_columns is the number of values in the original row.
114  const std::vector<TIndex> output_dimensions = {input_rows,
115  input_columns - 8};
116  output->Resize(output_dimensions);
117  const auto output_columns = output->dim(1);
118 
119  const auto* input_data = input.template data<uint8_t>();
120  auto* output_data = output->template mutable_data<float>();
121 
122  for (size_t row = 0; row < input_rows; ++row) {
123  const uint8_t* input_row = input_data + row * input_columns;
124  ConstEigenVectorArrayMap<uint8_t> input_row_values(
125  input_row, output_columns);
126  ConstEigenVectorArrayMap<float> input_row_scale_bias(
127  reinterpret_cast<const float*>(input_row + output_columns), 2);
128 
129  EigenVectorArrayMap<float> output_row(
130  output_data + row * output_columns, output_columns);
131 
132  output_row = input_row_values.cast<float>() * input_row_scale_bias(0) +
133  input_row_scale_bias(1);
134  }
135  return true;
136  }
137 
138  private:
139  INPUT_TAGS(DATA_FUSED_SCALE_BIAS_INT8);
140  OUTPUT_TAGS(DATA_FLOAT);
141 };
142 
143 #undef IS_LITTLE_ENDIAN
144 
145 } // namespace caffe2
146 
147 #endif // CAFFE2_OPERATORS_FUSED_ROWWISE_8BIT_CONVERSION_OPS_H_
Copyright (c) 2016-present, Facebook, Inc.