2 #ifndef CAFFE2_OPERATORS_LENGTHS_REDUCER_ROWWISE_8bits_OP_H_ 3 #define CAFFE2_OPERATORS_LENGTHS_REDUCER_ROWWISE_8bits_OP_H_ 6 #include "caffe2/core/context.h" 7 #include "caffe2/core/logging.h" 8 #include "caffe2/core/operator.h" 9 #include "caffe2/operators/reducer_functors.h" 10 #include "caffe2/perfkernels/embedding_lookup.h" 11 #include "caffe2/utils/eigen_utils.h" 12 #include "caffe2/utils/math.h" 17 const float kEqualityThreshold = 1e-10f;
24 class OutDataT =
float>
27 USE_OPERATOR_CONTEXT_FUNCTIONS;
30 bool RunOnDevice()
override {
32 this,
Input(INDICES));
35 template <
typename IndexType>
36 bool DoRunWithType() {
37 auto& dataInput =
Input(DATA);
38 auto& lengthsInput =
Input(LENGTHS);
40 auto* scale_bias =
Input(SCALE_BIAS).template data<float>();
41 CAFFE_ENFORCE_EQ(1, lengthsInput.dim(),
"LENGTHS must be a vector");
42 const int64_t outputSize = lengthsInput.size(0);
44 auto& indicesInput =
Input(INDICES);
45 CAFFE_ENFORCE_EQ(2,
Input(SCALE_BIAS).dim(),
"scale_bias has to be matrix");
48 Input(SCALE_BIAS).size(0),
49 "scale_bias must have the same first dim as data");
52 Input(SCALE_BIAS).size(1),
53 "the second dim of scale_bias has to be equal to 2");
54 CAFFE_ENFORCE_EQ(1, indicesInput.dim(),
"INDICES must be a vector");
55 const IndexType* indices = indicesInput.template data<IndexType>();
56 int64_t dataToReduceSize = indicesInput.size(0);
58 const int* lengths = lengthsInput.template data<int>();
59 vector<int64_t> shape = dataInput.sizes().vec();
60 shape[0] = outputSize;
61 auto* output = Output(0, shape, at::dtype<OutDataT>());
62 const float* w =
nullptr;
64 w =
Input(WEIGHTS).template data<float>();
66 int64_t in_block_size = dataInput.size_from_dim(1);
67 OutDataT* out = output->template mutable_data<OutDataT>();
68 const uint8_t* input_data = dataInput.template data<uint8_t>();
71 const int64_t indices_size = indicesInput.numel();
72 const int64_t N = dataInput.size(0);
92 INDICES = 1 + USE_WEIGHTS,
93 LENGTHS = 2 + USE_WEIGHTS,
94 SCALE_BIAS = 3 + USE_WEIGHTS
98 template <
class Context>
101 USE_OPERATOR_CONTEXT_FUNCTIONS;
103 bool RunOnDevice()
override {
104 auto& input =
Input(DATA_FLOAT);
106 auto* input_data = input.template data<float>();
107 auto* output = Output(DATA_UINT8, input.sizes(), at::dtype<uint8_t>());
108 vector<int64_t> scale_bias_dims = {input.size(0), 2};
109 auto* scale_bias = Output(SCALE_BIAS, scale_bias_dims, at::dtype<float>());
110 auto* output_data = output->template mutable_data<uint8_t>();
111 float* scale_bias_data = scale_bias->template mutable_data<float>();
112 size_t n_blocks = input.size(0);
113 size_t block_size = input.size_from_dim(1);
114 for (
size_t i = 0; i < n_blocks; ++i) {
115 ConstEigenVectorArrayMap<float> input_row(
116 input_data + i * block_size, block_size);
117 EigenVectorArrayMap<uint8_t> output_row(
118 output_data + i * block_size, block_size);
119 auto min_element = input_row.minCoeff();
120 auto max_element = input_row.maxCoeff();
121 if (max_element - min_element < kEqualityThreshold) {
122 scale_bias_data[2 * i] = 1.0f;
123 scale_bias_data[2 * i + 1] = min_element;
124 memset(output_data + i * block_size, 0, block_size);
126 scale_bias_data[2 * i] = (max_element - min_element) / 255.0f;
127 scale_bias_data[2 * i + 1] = min_element;
128 const float inv_scale = 1.0f / scale_bias_data[2 * i];
129 output_row = ((input_row - scale_bias_data[2 * i + 1]) * inv_scale)
131 .template cast<uint8_t>();
138 INPUT_TAGS(DATA_FLOAT);
139 OUTPUT_TAGS(DATA_UINT8, SCALE_BIAS);
142 template <
class Context>
145 USE_OPERATOR_CONTEXT_FUNCTIONS;
147 bool RunOnDevice()
override {
148 auto& input =
Input(DATA_UINT8);
149 auto& scale_bias =
Input(SCALE_BIAS);
151 CAFFE_ENFORCE_EQ(2, scale_bias.dim(),
"scale_bias has to be matrix");
155 "scale_bias must have the same first dim as data");
159 "the second dim of scale_bias has to be equal to 2");
160 auto* output = Output(DATA_FLOAT, input.sizes(), at::dtype<float>());
161 auto* input_data = input.template data<uint8_t>();
162 auto* scale_bias_data = scale_bias.template data<float>();
164 auto* output_data = output->template mutable_data<float>();
165 size_t block_size = input.size_from_dim(1);
166 size_t n_blocks = input.size(0);
168 for (
size_t i = 0; i < n_blocks; ++i) {
169 ConstEigenVectorArrayMap<uint8_t> input_row(
170 input_data + i * block_size, block_size);
171 EigenVectorArrayMap<float> output_row(
172 output_data + i * block_size, block_size);
173 output_row = input_row.template cast<float>() * scale_bias_data[2 * i] +
174 scale_bias_data[2 * i + 1];
180 INPUT_TAGS(DATA_UINT8, SCALE_BIAS);
181 OUTPUT_TAGS(DATA_FLOAT);
184 #endif // CAFFE2_OPERATORS_LENGTHS_REDUCER_ROWWISE_8bits_H_
const Tensor & Input(int idx, DeviceType type=Context::GetDeviceType())
Retrieve a non-owning reference to the input at position 'idx' for this operator. ...
A global dictionary that holds information about what Caffe2 modules have been loaded in the current ...
void EmbeddingLookup(const std::int64_t block_size, const std::int64_t output_size, const std::int64_t index_size, const std::int64_t data_size, const InType *input, const IndexType *indices, const int *lengths, const float *weights, const float *scale_bias, bool normalize_by_lengths, OutType *out)
Embedding lookup with reduction.