Caffe2 - C++ API
A deep learning, cross platform ML framework
lengths_reducer_rowwise_8bit_ops.h
1 
18 #ifndef CAFFE2_OPERATORS_LENGTHS_REDUCER_ROWWISE_8bits_OP_H_
19 #define CAFFE2_OPERATORS_LENGTHS_REDUCER_ROWWISE_8bits_OP_H_
20 // SparseLengthsSum8bits
21 
22 #include "caffe2/core/context.h"
23 #include "caffe2/core/logging.h"
24 #include "caffe2/core/operator.h"
25 #include "caffe2/operators/reducer_functors.h"
26 #include "caffe2/perfkernels/embedding_lookup.h"
27 #include "caffe2/utils/math.h"
28 
29 namespace caffe2 {
30 
31 namespace {
32 const float kEqualityThreshold = 1e-10f;
33 }
34 
35 template <
36  class Context,
37  bool USE_WEIGHTS = 0,
38  bool USE_MEAN = 0,
39  class OutDataT = float>
40 class SparseLengths8BitsRowwiseOp : public Operator<Context> {
41  public:
42  USE_OPERATOR_CONTEXT_FUNCTIONS;
43  USE_SIMPLE_CTOR_DTOR(SparseLengths8BitsRowwiseOp);
44 
45  bool RunOnDevice() override {
47  this, Input(INDICES));
48  }
49 
50  template <typename IndexType>
51  bool DoRunWithType() {
52  auto& dataInput = Input(DATA);
53  auto& lengthsInput = Input(LENGTHS);
54  auto* output = Output(0);
55  auto* scale_bias = Input(SCALE_BIAS).template data<float>();
56  CAFFE_ENFORCE_EQ(1, lengthsInput.ndim(), "LENGTHS must be a vector");
57  const TIndex outputSize = lengthsInput.dim(0);
58 
59  auto& indicesInput = Input(INDICES);
60  CAFFE_ENFORCE_EQ(
61  2, Input(SCALE_BIAS).ndim(), "scale_bias has to be matrix");
62  CAFFE_ENFORCE_EQ(
63  dataInput.dim(0),
64  Input(SCALE_BIAS).dim(0),
65  "scale_bias must have the same first dim as data");
66  CAFFE_ENFORCE_EQ(
67  2,
68  Input(SCALE_BIAS).dim(1),
69  "the second dim of scale_bias has to be equal to 2");
70  CAFFE_ENFORCE_EQ(1, indicesInput.ndim(), "INDICES must be a vector");
71  const IndexType* indices = indicesInput.template data<IndexType>();
72  TIndex dataToReduceSize = indicesInput.dim(0);
73 
74  const int* lengths = lengthsInput.template data<int>();
75  vector<TIndex> shape = dataInput.dims();
76  shape[0] = outputSize;
77  output->Resize(shape);
78  const float* w = nullptr;
79  if (USE_WEIGHTS) {
80  w = Input(WEIGHTS).template data<float>();
81  }
82  TIndex in_block_size = dataInput.size_from_dim(1);
83  OutDataT* out = output->template mutable_data<OutDataT>();
84  const uint8_t* input_data = dataInput.template data<uint8_t>();
85 
86  // delegate work to perfkernel that branches based on architecture
87  const TIndex indices_size = indicesInput.size();
88  const TIndex N = dataInput.dim(0);
90  in_block_size,
91  outputSize,
92  indices_size,
93  N, // embeding table length
94  input_data,
95  indices,
96  lengths,
97  w,
98  scale_bias,
99  USE_MEAN,
100  out);
101 
102  return true;
103  }
104 
105  enum {
106  DATA = 0,
107  WEIGHTS = 1,
108  INDICES = 1 + USE_WEIGHTS,
109  LENGTHS = 2 + USE_WEIGHTS,
110  SCALE_BIAS = 3 + USE_WEIGHTS
111  };
112 };
113 
114 template <class Context>
115 class FloatToRowwiseQuantized8BitsOp : public Operator<Context> {
116  public:
117  USE_OPERATOR_CONTEXT_FUNCTIONS;
118  USE_SIMPLE_CTOR_DTOR(FloatToRowwiseQuantized8BitsOp);
119  bool RunOnDevice() override {
120  auto& input = Input(DATA_FLOAT);
121  auto* output = Output(DATA_UINT8);
122  auto* scale_bias = Output(SCALE_BIAS);
123  auto* input_data = input.template data<float>();
124  output->ResizeLike(input);
125  vector<TIndex> scale_bias_dims = {input.dim(0), 2};
126  scale_bias->Resize(scale_bias_dims);
127  auto* output_data = output->template mutable_data<uint8_t>();
128  float* scale_bias_data = scale_bias->template mutable_data<float>();
129  size_t n_blocks = input.dim(0);
130  size_t block_size = input.size_from_dim(1);
131  for (size_t i = 0; i < n_blocks; ++i) {
132  ConstEigenVectorArrayMap<float> input_row(
133  input_data + i * block_size, block_size);
134  EigenVectorArrayMap<uint8_t> output_row(
135  output_data + i * block_size, block_size);
136  auto min_element = input_row.minCoeff();
137  auto max_element = input_row.maxCoeff();
138  if (max_element - min_element < kEqualityThreshold) {
139  scale_bias_data[2 * i] = 1.0f;
140  scale_bias_data[2 * i + 1] = min_element;
141  memset(output_data + i * block_size, 0, block_size);
142  } else {
143  scale_bias_data[2 * i] = (max_element - min_element) / 255.0f;
144  scale_bias_data[2 * i + 1] = min_element;
145  const float inv_scale = 1.0f / scale_bias_data[2 * i];
146  output_row = ((input_row - scale_bias_data[2 * i + 1]) * inv_scale)
147  .round()
148  .template cast<uint8_t>();
149  }
150  }
151  return true;
152  }
153 
154  private:
155  INPUT_TAGS(DATA_FLOAT);
156  OUTPUT_TAGS(DATA_UINT8, SCALE_BIAS);
157 };
158 
159 template <class Context>
160 class Rowwise8BitQuantizedToFloatOp : public Operator<Context> {
161  public:
162  USE_OPERATOR_CONTEXT_FUNCTIONS;
163  USE_SIMPLE_CTOR_DTOR(Rowwise8BitQuantizedToFloatOp);
164  bool RunOnDevice() override {
165  auto& input = Input(DATA_UINT8);
166  auto& scale_bias = Input(SCALE_BIAS);
167  auto* output = Output(DATA_FLOAT);
168  CAFFE_ENFORCE_EQ(2, scale_bias.ndim(), "scale_bias has to be matrix");
169  CAFFE_ENFORCE_EQ(
170  input.dim(0),
171  scale_bias.dim(0),
172  "scale_bias must have the same first dim as data");
173  CAFFE_ENFORCE_EQ(
174  2,
175  scale_bias.dim(1),
176  "the second dim of scale_bias has to be equal to 2");
177  output->ResizeLike(input);
178  auto* input_data = input.template data<uint8_t>();
179  auto* scale_bias_data = scale_bias.template data<float>();
180 
181  auto* output_data = output->template mutable_data<float>();
182  size_t block_size = input.size_from_dim(1);
183  size_t n_blocks = input.dim(0);
184 
185  for (size_t i = 0; i < n_blocks; ++i) {
186  ConstEigenVectorArrayMap<uint8_t> input_row(
187  input_data + i * block_size, block_size);
188  EigenVectorArrayMap<float> output_row(
189  output_data + i * block_size, block_size);
190  output_row = input_row.template cast<float>() * scale_bias_data[2 * i] +
191  scale_bias_data[2 * i + 1];
192  }
193  return true;
194  }
195 
196  private:
197  INPUT_TAGS(DATA_UINT8, SCALE_BIAS);
198  OUTPUT_TAGS(DATA_FLOAT);
199 };
200 }
201 #endif // CAFFE2_OPERATORS_LENGTHS_REDUCER_ROWWISE_8bits_H_
Copyright (c) 2016-present, Facebook, Inc.
void EmbeddingLookup(const TIndex block_size, const TIndex output_size, const TIndex index_size, const TIndex data_size, const InType *input, const IndexType *indices, const int *lengths, const float *weights, const float *scale_bias, bool normalize_by_lengths, OutType *out)
Embedding lookup with reduction.