Caffe2 - C++ API
A deep learning, cross platform ML framework
fused_rowwise_8bit_conversion_ops.cc
1 
17 #include "caffe2/operators/fused_rowwise_8bit_conversion_ops.h"
18 #include "caffe2/core/registry.h"
19 
20 namespace caffe2 {
21 REGISTER_CPU_OPERATOR(
22  FloatToFused8BitRowwiseQuantized,
23  FloatToFused8BitRowwiseQuantizedOp<CPUContext>);
24 OPERATOR_SCHEMA(FloatToFused8BitRowwiseQuantized)
25  .NumInputs(1)
26  .NumOutputs(1)
27  .SetDoc(R"DOC(
28 Applies 8-bit row-wise quantization by determining the range
29 (maximum - minimum) and offset (minimum value) of each row in the input
30 matrix, and then scaling each element to an 8-bit number between 0 and
31 255. To later de-quantize values, the scale (range / 255) and offset
32 (bias) are stored alongside the data. More precisely, the first 4 bytes
33 of each row in the output matrix are a 32-bit float storing the scale,
34 the next 4 bytes store the bias as a 32-bit float, and all remaining
35 bytes in the row encode single quantized values.)
36 )DOC")
37  .Input(0, "input", "Float32 input data")
38  .Output(0, "output", "Fused scale, bias and quantized data");
39 NO_GRADIENT(FloatToFused8BitRowwiseQuantized);
40 
41 REGISTER_CPU_OPERATOR(
42  Fused8BitRowwiseQuantizedToFloat,
43  Fused8BitRowwiseQuantizedToFloatOp<CPUContext>);
44 OPERATOR_SCHEMA(Fused8BitRowwiseQuantizedToFloat)
45  .NumInputs(1)
46  .NumOutputs(1)
47  .SetDoc(R"DOC(
48 De-quantizes the result of the
49 FloatToFused8BitRowwiseQuantized operator. The input is expected to
50 encode the scale as a 32-bit float in the first 4 bytes of each row,
51 followed by the bias as a 32-bit float in the next 4 bytes, followed by
52 the quantized values in the remaining bytes of the row. The output is a
53 matrix containing only the values, but de-quantized. De-quantization is
54 performed by multiplying each value by its row's scale and bias
55 parameters. The de-quantized values will thus not be exactly equal to
56 the original, un-quantized floating point values.
57 )DOC")
58  .Input(
59  0,
60  "scale_bias_quantized_input",
61  "Fused scale, bias and quantized data")
62  .Output(0, "float_input", "Float32 data");
63 NO_GRADIENT(Fused8BitRowwiseQuantizedToFloat);
64 } // namespace caffe2
Copyright (c) 2016-present, Facebook, Inc.