Caffe2 - C++ API
A deep learning, cross platform ML framework
fused_rowwise_8bit_conversion_ops.cc
1 #include "caffe2/operators/fused_rowwise_8bit_conversion_ops.h"
2 #include "c10/util/Registry.h"
3 
4 namespace caffe2 {
5 
6 namespace {
7 void convertfp32fp32(float* dst, const float* src, size_t N) {
8  memcpy(dst, src, sizeof(float) * N);
9 }
10 
11 void convertfp16fp32(float* dst, const at::Half* src, size_t N) {
12  for (size_t i = 0; i < N; i++) {
13  dst[i] = src[i];
14  }
15 }
16 
17 void convertfp32fp16(at::Half* dst, const float* src, size_t N) {
18  for (size_t i = 0; i < N; i++) {
19  dst[i] = src[i];
20  }
21 }
22 } // namespace
23 
24 REGISTER_CPU_OPERATOR(
25  FloatToFused8BitRowwiseQuantized,
26  FloatToFused8BitRowwiseQuantizedOp<float, convertfp32fp32, CPUContext>);
27 OPERATOR_SCHEMA(FloatToFused8BitRowwiseQuantized)
28  .NumInputs(1)
29  .NumOutputs(1)
30  .TensorInferenceFunction([](const OperatorDef& /* def */,
31  const vector<TensorShape>& in) {
32  vector<TensorShape> out;
33  TensorShape X = in[0];
34  X.set_dims(1, X.dims(1) + 8);
35  out.push_back(std::move(X));
36  out[0].set_data_type(TensorProto_DataType_UINT8);
37  return out;
38  })
39  .SetDoc(R"DOC(
40 Applies 8-bit row-wise quantization by determining the range
41 (maximum - minimum) and offset (minimum value) of each row in the input
42 matrix, and then scaling each element to an 8-bit number between 0 and
43 255. To later de-quantize values, the scale (range / 255) and offset
44 (bias) are stored alongside the data. More precisely, the first 4 bytes
45 of each row in the output matrix are a 32-bit float storing the scale,
46 the next 4 bytes store the bias as a 32-bit float, and all remaining
47 bytes in the row encode single quantized values.)
48 )DOC")
49  .Input(0, "input", "Float32 input data")
50  .Output(0, "output", "Fused scale, bias and quantized data");
51 NO_GRADIENT(FloatToFused8BitRowwiseQuantized);
52 
53 REGISTER_CPU_OPERATOR(
54  HalfFloatToFused8BitRowwiseQuantized,
55  FloatToFused8BitRowwiseQuantizedOp<at::Half, convertfp16fp32, CPUContext>);
56 OPERATOR_SCHEMA(HalfFloatToFused8BitRowwiseQuantized)
57  .NumInputs(1)
58  .NumOutputs(1)
59  .TensorInferenceFunction([](const OperatorDef& /* def */,
60  const vector<TensorShape>& in) {
61  vector<TensorShape> out;
62  TensorShape X = in[0];
63  X.set_dims(1, X.dims(1) + 8);
64  out.push_back(std::move(X));
65  out[0].set_data_type(TensorProto_DataType_UINT8);
66  return out;
67  })
68  .SetDoc(R"DOC(
69 Applies 8-bit row-wise quantization by determining the range
70 (maximum - minimum) and offset (minimum value) of each row in the input
71 matrix, and then scaling each element to an 8-bit number between 0 and
72 255. To later de-quantize values, the scale (range / 255) and offset
73 (bias) are stored alongside the data. More precisely, the first 4 bytes
74 of each row in the output matrix are a 32-bit float storing the scale,
75 the next 4 bytes store the bias as a 32-bit float, and all remaining
76 bytes in the row encode single quantized values.)
77 )DOC")
78  .Input(0, "input", "Float16 input data")
79  .Output(0, "output", "Fused scale, bias and quantized data");
80 NO_GRADIENT(HalfFloatToFused8BitRowwiseQuantized);
81 
82 REGISTER_CPU_OPERATOR(
83  Fused8BitRowwiseQuantizedToFloat,
84  Fused8BitRowwiseQuantizedToFloatOp<float, convertfp32fp32, CPUContext>);
85 OPERATOR_SCHEMA(Fused8BitRowwiseQuantizedToFloat)
86  .NumInputs(1)
87  .NumOutputs(1)
88  .TensorInferenceFunction([](const OperatorDef& /* def */,
89  const vector<TensorShape>& in) {
90  vector<TensorShape> out;
91  TensorShape X = in[0];
92  X.set_dims(1, X.dims(1) - 8);
93  out.push_back(std::move(X));
94  out[0].set_data_type(TensorProto_DataType_FLOAT);
95  return out;
96  })
97  .SetDoc(R"DOC(
98 De-quantizes the result of the
99 FloatToFused8BitRowwiseQuantized operator. The input is expected to
100 encode the scale as a 32-bit float in the second to the last 4 bytes of each
101 row, followed by the bias as a 32-bit float in the next 4 bytes, and the
102 quantized values in the preceding bytes of the row. The output is a
103 matrix containing only the values, but de-quantized. De-quantization is
104 performed by multiplying each value by its row's scale and bias
105 parameters. The de-quantized values will thus not be exactly equal to
106 the original, un-quantized floating point values.
107 )DOC")
108  .Input(
109  0,
110  "scale_bias_quantized_input",
111  "Fused scale, bias and quantized data")
112  .Output(0, "float_output", "Float32 data");
113 NO_GRADIENT(Fused8BitRowwiseQuantizedToFloat);
114 
115 REGISTER_CPU_OPERATOR(
116  Fused8BitRowwiseQuantizedToHalfFloat,
117  Fused8BitRowwiseQuantizedToFloatOp<at::Half, convertfp32fp16, CPUContext>);
118 OPERATOR_SCHEMA(Fused8BitRowwiseQuantizedToHalfFloat)
119  .NumInputs(1)
120  .NumOutputs(1)
121  .TensorInferenceFunction([](const OperatorDef& /* def */,
122  const vector<TensorShape>& in) {
123  vector<TensorShape> out;
124  TensorShape X = in[0];
125  X.set_dims(1, X.dims(1) - 8);
126  out.push_back(std::move(X));
127  out[0].set_data_type(TensorProto_DataType_FLOAT16);
128  return out;
129  })
130  .SetDoc(R"DOC(
131 De-quantizes the result of the
132 HalfFloatToFused8BitRowwiseQuantized operator. The input is expected to
133 encode the scale as a 32-bit float in the second to the last 4 bytes of each
134 row, followed by the bias as a 32-bit float in the next 4 bytes, and the
135 quantized values in the preceding bytes of the row. The output is a
136 matrix containing only the values, but de-quantized. De-quantization is
137 performed by multiplying each value by its row's scale and bias
138 parameters. The de-quantized values will thus not be exactly equal to
139 the original, un-quantized floating point values.
140 )DOC")
141  .Input(
142  0,
143  "scale_bias_quantized_input",
144  "Fused scale, bias and quantized data")
145  .Output(0, "float16_output", "Float16 data");
146 NO_GRADIENT(Fused8BitRowwiseQuantizedToHalfFloat);
147 
148 } // namespace caffe2
A global dictionary that holds information about what Caffe2 modules have been loaded in the current ...
Definition: blob.h:13