Caffe2 - C++ API
A deep learning, cross platform ML framework
1 #include "caffe2/operators/fused_rowwise_random_quantization_ops.h"
2 #include <c10/util/Registry.h>
3 #include "caffe2/utils/math.h"
5 namespace caffe2 {
7 #define IS_LITTLE_ENDIAN \
8  [] { \
9  const int32_t kValue = 1; \
10  return reinterpret_cast<const uint8_t*>(&kValue)[0] == 1; \
11  }()
13 template <class Context>
14 bool FloatToFusedRandRowwiseQuantizedOp<Context>::RunOnDevice() {
15  CAFFE_ENFORCE(IS_LITTLE_ENDIAN, "Unsupported endianness");
17  const auto& input = Input(DATA_FLOAT);
20  input.dim(),
21  2,
22  "Expect input to be a matrix. Reshape the input tensor to a matrix for usage.");
24  const auto input_rows = input.size(0);
25  const auto input_columns = input.size(1);
27  // The "fused" representation stores the [bitwidth][tail][min][max]
28  // with the row-wise quantized data in one tensor. Since we store 8/bitwidth
29  // quantized data in one byte, the last buckets of some bytes may have
30  // unused bits. There are totally tail buckets are unused.
31  // We encode *bitwidth* and *tail* at the beginning of
32  // each row, following by 32-bit floating data respresenting min and max.
33  // | bitwidth | tail | min | max | ... int8 data ... |
34  // | 1B | 1B | 4B | 4B | ...output_data....|
35  // In output_data: the b-th bucket of the i-th byte stores
36  // the i-th data of the b-th segment of input row
37  size_t data_per_byte = 8 / bitwidth_;
38  // How many bytes in the output
39  size_t segment_size = (input_columns + data_per_byte - 1) / data_per_byte;
40  const std::vector<int64_t> output_dimensions = {
41  input_rows, 10 + static_cast<int64_t>(segment_size)};
42  auto* output =
43  Output(DATA_FUSED_QUANTIZED, output_dimensions, at::dtype<uint8_t>());
45  const auto* input_data = input.template data<float>();
46  auto* output_data = output->template mutable_data<uint8_t>();
47  const size_t output_columns = static_cast<size_t>(output->size(1));
48  memset(output_data, 0, output->numel());
50  if (random_) {
51  random_buffer_.resize(input_columns);
52  }
54  for (size_t row = 0; row < input_rows; ++row) {
55  if (random_) {
57  int status = vsRngUniform(
59  vslStream_,
60  input_columns,
62  0.0f,
63  1.0f);
64  if (status != VSL_ERROR_OK) {
65  LOG(WARNING) << "vsRngUniform returns " << status;
66  }
67 #else
68  for (int i = 0; i < input_columns; ++i) {
69  random_buffer_[i] = (*dis_)(gen_);
70  }
71 #endif
72  }
74  math::quantize_and_compress(
75  input_data + row * input_columns,
76  output_data + row * output_columns,
77  input_columns,
78  bitwidth_,
79  random_,
81  }
83  return true;
84 }
86 template <class Context>
87 bool FusedRandRowwiseQuantizedToFloatOp<Context>::RunOnDevice() {
88  CAFFE_ENFORCE(IS_LITTLE_ENDIAN, "Unsupported endianness");
90  const auto& input = Input(DATA_FUSED_QUANTIZED);
92  CAFFE_ENFORCE_EQ(input.dim(), 2, "Expect input to be a matrix.");
94  input.numel(),
95  4,
96  "Expect input to have size greater than or equal to 4.");
98  const auto input_rows = input.size(0);
99  const auto input_columns = input.size(1);
100  const auto* input_data = input.template data<uint8_t>();
101  const size_t bitwidth = input_data[0];
103  bitwidth == 1 || bitwidth == 2 || bitwidth == 4 || bitwidth == 8,
104  "Unsupported bitwidth");
105  const size_t tail = input_data[1];
106  const size_t output_columns = (input_columns - 10) * (8 / bitwidth) - tail;
107  const std::vector<int64_t> output_dimensions = {
108  input_rows, static_cast<int64_t>(output_columns)};
109  auto* output = Output(DATA_FLOAT, output_dimensions, at::dtype<float>());
110  auto* output_data = output->template mutable_data<float>();
111  for (size_t row = 0; row < input_rows; ++row) {
112  math::decompress_and_dequantize(
113  input_data + row * input_columns,
114  output_data + row * output_columns,
115  input_columns);
116  }
118  return true;
119 }
124  FloatToFusedRandRowwiseQuantized,
125  FloatToFusedRandRowwiseQuantizedOp<CPUContext>);
126 OPERATOR_SCHEMA(FloatToFusedRandRowwiseQuantized)
127  .NumInputs(1)
128  .NumOutputs(1)
129  .TensorInferenceFunction([](const OperatorDef& def,
130  const vector<TensorShape>& in) {
131  ArgumentHelper helper(def);
132  auto bitwidth = helper.GetSingleArgument<int32_t>("bitwidth", 8);
133  size_t data_per_byte = 8 / bitwidth;
134  vector<TensorShape> out;
135  TensorShape X = in[0];
136  X.set_dims(1, 10 + (X.dims(1) + data_per_byte - 1) / data_per_byte);
137  out.push_back(std::move(X));
138  out[0].set_data_type(TensorProto_DataType_UINT8);
139  return out;
140  })
141  .SetDoc(R"DOC(
142 Applies row-wise stochastic/random quantization by determining the range of
143 each row in the input matrix, and then quantize each element to one of two
144 closest discrete levels by randomly drawing Bernoulli distribution.
145 The method is extended from TernGrad [1],
146 which randomly quantizes gradients to three levels to reduce communication in distributed training.
147 The format of each row (x) in the output matrix is [bitwidth][tail][min][max][data]:
148 bitwidth[1 Byte]: bitwidth per data [1, 2, 4 or 8];
149 tail[1 Byte]: the number of unused buckets [1-8] (One byte is split to 8/bitwidth buckets and each bucket stores one low-precision data in bitwidth bits);
150 min[4 Bytes]: the minimum floating value min(x);
151 max[4 Bytes]: the maximum floating value max(x);
152 data: quantized data.
153 The quantization is uniform with levels q = min + (max-min)/(2^bitwidth - 1)*[0:1:2^bitwidth].
154 During stochastic/random quantization x'=Quantize(x), for q_j < x_i <= q_{j+1}, we draw quantization x'_i from Bernoulli distributions with
155 P(x'_i = q_{j+1}) = (x_i - q_j)/(q_{j+1} - q_j), and
156 P(x'_i = q_j) = (q_{j+1} - x_i)/(q_{j+1} - q_j) where x'_i is the quantized value of x_i.
157 [1] proved E{x'_i}=x_i, which is an unbiased approximation. More details are in the paper.
158 For example, suppose targeted bitwidth = 2 and x = [0.3, -1.4, -0.6, 0.9, 1.0],
159 then tail = 3, min = -1.4, max = 1.0 and q = [-1.4, -0.6, 0.2, 1.0].
160 x_1 = 0.3 will be quantized to x'_1 = 0.2 with probability 7/8 and to x'_1 = 1.0 with probability 1/8.
161 The storage format of quantized data is: [x'_1|x'_3|x'_5|xxx]-[x'_2|x'_4|xxx|xxx].
162 In general, a input row is split to multiple segments. One segment is a continuous subarray of the row,
163 and its length is the number of bytes storing quantized data in the output matrix.
164 The b-th bucket of the i-th byte stores the i-th data of the b-th segment of input row.
166 [1] Wen, Wei, Cong Xu, Feng Yan, Chunpeng Wu, Yandan Wang, Yiran Chen, and Hai Li.
167 "Terngrad: Ternary gradients to reduce communication in distributed deep learning."
168 In Advances in Neural Information Processing Systems, pp. 1508-1518. 2017.
170 )DOC")
171  .Input(0, "input", "Float32 input data")
172  .Output(0, "output", "Fused bitwidth, tail, min, max and quantized data")
173  .Arg("bitwidth", "How many bits to quantiz per data (defaults to 8).")
174  .Arg("random", "random or not (True). False is set up for unittest.");
175 NO_GRADIENT(FloatToFusedRandRowwiseQuantized);
178  FusedRandRowwiseQuantizedToFloat,
179  FusedRandRowwiseQuantizedToFloatOp<CPUContext>);
180 OPERATOR_SCHEMA(FusedRandRowwiseQuantizedToFloat)
181  .NumInputs(1)
182  .NumOutputs(1)
183  .TensorInferenceFunction([](const OperatorDef& def,
184  const vector<TensorShape>&) {
185  vector<TensorShape> out;
186  for (int i = 0; i < def.output_size(); i++) {
187  TensorShape ts;
188  ts.set_unknown_shape(true);
189  ts.set_data_type(TensorProto_DataType_FLOAT);
190  out.push_back(ts);
191  }
192  return out;
193  })
194  .SetDoc(R"DOC(
195 De-quantizes the result of the FloatToFusedRandRowwiseQuantized operator.
196 Refer FloatToFusedRandRowwiseQuantized operator for details.
197 )DOC")
198  .Input(
199  0,
200  "quantized_input",
201  "Fused bitwidth, tail, min, max and quantized data")
202  .Output(0, "float_input", "Float32 data");
203 NO_GRADIENT(FusedRandRowwiseQuantizedToFloat);
204 } // namespace caffe2
A global dictionary that holds information about what Caffe2 modules have been loaded in the current ...
Definition: blob.h:13