Caffe2 - C++ API
A deep learning, cross platform ML framework
one_hot_ops.cc
1 #include "caffe2/operators/one_hot_ops.h"
2 
3 #include "caffe2/core/operator.h"
4 #include "caffe2/core/tensor.h"
5 
6 namespace caffe2 {
7 
8 template <>
9 template <typename T>
10 bool BatchOneHotOp<CPUContext>::DoRunWithType() {
11  auto& input = Input(X);
12  auto& lens = Input(LENS);
13  auto& vals = Input(VALS);
14  CAFFE_ENFORCE_GE(input.dim(), 1);
15  auto N = input.size(0);
16  auto D = input.size_from_dim(1);
17  CAFFE_ENFORCE_EQ(lens.numel(), D);
18 
19  const auto* lens_data = lens.template data<int32_t>();
20  int64_t output_dim = 0;
21  valsOffsets_.resize(D + 1);
22  for (int64_t i = 0; i < D; i++) {
23  CAFFE_ENFORCE_GE(lens_data[i], 0);
24  valsOffsets_[i] = output_dim;
25  output_dim += lens_data[i];
26  }
27  valsOffsets_[D] = output_dim;
28 
29  CAFFE_ENFORCE_EQ(vals.numel(), output_dim);
30 
31  auto* output = Output(ONE_HOT, {N, output_dim}, at::dtype<T>());
32 
33  const auto* input_data = input.template data<T>();
34  const auto* vals_data = vals.template data<T>();
35  auto* output_data = output->template mutable_data<T>();
36 
37  for (int64_t i = 0; i < N; ++i) {
38  for (int64_t j = 0; j < D; j++) {
39  const auto input_val = input_data[i * D + j];
40  for (int64_t k = valsOffsets_[j]; k < valsOffsets_[j + 1]; ++k) {
41  output_data[k] = vals_data[k] == input_val;
42  }
43  }
44  output_data += output_dim;
45  }
46 
47  return true;
48 }
49 
50 vector<TensorShape> TensorInferenceForBatchOneHot(
51  const OperatorDef& /* def */,
52  const vector<TensorShape>& in) {
53  std::vector<int64_t> output_dims(2);
54  output_dims[0] = in[0].dims(0); // N
55  output_dims[1] = in[2].dims(0); // vals.size()
56  return vector<TensorShape>{
57  CreateTensorShape(vector<int64_t>{output_dims}, in[0].data_type())};
58 }
59 
60 vector<TensorShape> TensorInferenceForBucketBatchOneHot(
61  const OperatorDef& /* def */,
62  const vector<TensorShape>& in) {
63  std::vector<int64_t> output_dims(2);
64  output_dims[0] = in[0].dims(0); // N
65  output_dims[1] = in[1].dims(0) + in[2].dims(0); // vals.size() + length.size()
66  return vector<TensorShape>{
67  CreateTensorShape(vector<int64_t>{output_dims}, in[0].data_type())};
68 }
69 
70 OpSchema::Cost CostInferenceForBatchOneHot(
71  const OperatorDef& def,
72  const vector<TensorShape>& in) {
73  CAFFE_ENFORCE_EQ(in.size(), 3, "BatchOneHot requires three inputs");
74  struct OpSchema::Cost c;
75  const TensorShape output = TensorInferenceForBatchOneHot(def, in)[0];
76 
77  const auto& data = in[0];
78  const auto& length = in[1];
79  const auto& values = in[2];
80 
81  uint64_t nBytesData = nElemFromDim(data) * sizeof(data.data_type());
82  uint64_t nBytesLength = nElemFromDim(length) * sizeof(length.data_type());
83  uint64_t nBytesValues = nElemFromDim(values) * sizeof(values.data_type());
84  c.flops = 0;
85  c.bytes_read = nBytesData + nBytesLength + nBytesValues;
86  c.bytes_written = nElemFromDim(output) * sizeof(output.data_type());
87  c.params_bytes = 0;
88  return c;
89 }
90 
91 template <>
92 void OneHotOp<CPUContext>::DoOneHotOp(
93  int64_t batch_size,
94  int64_t index_size,
95  const Tensor& indices,
96  Tensor* one_hots) {
97  const int64_t* indices_ptr = indices.template data<int64_t>();
98  float* one_hots_ptr = one_hots->template mutable_data<float>();
99  memset(one_hots_ptr, 0, one_hots->nbytes());
100  for (int i = 0; i < batch_size; ++i) {
101  auto label_idx = indices_ptr[i];
102  DCHECK((0 <= label_idx) && (label_idx < index_size));
103  one_hots_ptr[label_idx] = 1.0;
104  one_hots_ptr += index_size;
105  }
106 }
107 
108 template <>
109 bool BatchBucketOneHotOp<CPUContext>::RunOnDevice() {
110  auto& input = Input(X);
111  auto& lens = Input(LENS);
112  auto& boundaries = Input(BOUNDARIES);
113  CAFFE_ENFORCE_GE(input.dim(), 1);
114  auto N = input.size(0);
115  auto D = input.size_from_dim(1);
116  CAFFE_ENFORCE_EQ(lens.numel(), D);
117 
118  const auto* lens_data = lens.template data<int32_t>();
119 
120  CAFFE_ENFORCE_EQ(
121  std::accumulate(lens_data, lens_data + lens.numel(), 0),
122  boundaries.numel(),
123  "The sum of length should be equal to the length of boundaries");
124 
125  int64_t output_dim = 0;
126  for (int64_t i = 0; i < D; i++) {
127  CAFFE_ENFORCE_GT(lens_data[i], 0);
128  // Number of buckets is number of bucket edges + 1
129  output_dim += (lens_data[i] + 1);
130  }
131 
132  auto* output = Output(ONE_HOT, {N, output_dim}, at::dtype<float>());
133 
134  const auto* input_data = input.template data<float>();
135  const auto* boundaries_data = boundaries.template data<float>();
136  auto* output_data = output->template mutable_data<float>();
137 
138  math::Set<float, CPUContext>(output->numel(), 0.f, output_data, &context_);
139 
140  int64_t pos = 0;
141  for (int64_t i = 0; i < N; i++) {
142  auto* boundaries_offset = boundaries_data;
143  int64_t output_offset = 0;
144 
145  for (int64_t j = 0; j < D; j++) {
146  // here we assume the boundary values for each feature are sorted
147  int64_t lower_bucket_idx = std::lower_bound(
148  boundaries_offset,
149  boundaries_offset + lens_data[j],
150  input_data[pos]) -
151  boundaries_offset;
152 
153  int64_t upper_bucket_idx = std::upper_bound(
154  boundaries_offset,
155  boundaries_offset + lens_data[j],
156  input_data[pos]) -
157  boundaries_offset;
158 
159  int64_t bucket_idx = (lower_bucket_idx + upper_bucket_idx) / 2;
160  output_data[i * output_dim + output_offset + bucket_idx] = 1.0;
161  boundaries_offset += lens_data[j];
162  output_offset += (lens_data[j] + 1);
163  pos++;
164  }
165  }
166 
167  return true;
168 };
169 
170 class SegmentOneHotOp : public Operator<CPUContext> {
171  public:
172  template <class... Args>
173  explicit SegmentOneHotOp(Args&&... args)
174  : Operator(std::forward<Args>(args)...) {}
175 
176  bool RunOnDevice() override {
177  auto& lengths = Input(0);
178  auto& indices = Input(1);
179  auto& index_size_tensor = Input(2);
180  CAFFE_ENFORCE(lengths.dim() == 1);
181  CAFFE_ENFORCE(indices.dim() == 1);
182  CAFFE_ENFORCE(index_size_tensor.numel() == 1);
183  auto batch_size = lengths.numel();
184  auto index_size = *index_size_tensor.data<int64_t>();
185  CAFFE_ENFORCE(index_size > 0);
186 
187  auto* lengths_ptr = lengths.data<int32_t>();
188  auto* indices_ptr = indices.data<int64_t>();
189 
190  auto* one_hots = Output(0, {batch_size, index_size}, at::dtype<float>());
191  auto* one_hots_ptr = one_hots->template mutable_data<float>();
192  if (one_hots->numel() == 0) {
193  return true;
194  }
195  memset(one_hots_ptr, 0, one_hots->nbytes());
196  int el_idx = 0;
197  for (int i = 0; i < batch_size; ++i) {
198  for (int j = 0; j < lengths_ptr[i]; ++j) {
199  DCHECK(el_idx < indices.numel());
200  auto label_idx = indices_ptr[el_idx++];
201  DCHECK((0 <= label_idx) && (label_idx < index_size));
202  one_hots_ptr[label_idx] = 1.0;
203  }
204  one_hots_ptr += index_size;
205  }
206  return true;
207  }
208 };
209 REGISTER_CPU_OPERATOR(BatchBucketOneHot, BatchBucketOneHotOp<CPUContext>);
210 REGISTER_CPU_OPERATOR(BatchOneHot, BatchOneHotOp<CPUContext>);
211 REGISTER_CPU_OPERATOR(OneHot, OneHotOp<CPUContext>);
212 REGISTER_CPU_OPERATOR(SegmentOneHot, SegmentOneHotOp);
213 
214 OPERATOR_SCHEMA(BatchBucketOneHot)
215  .NumInputs(3)
216  .NumOutputs(1)
217  .DisallowInputFillers() // TODO: enable the filler
218  .SetDoc(R"DOC(
219 Input is a matrix tensor. Its first dimension is the batch
220 size. For each column, bucketize it based on the boundary values and then do
221 one hot encoding. The `lengths` specifies the number of boundary values for each
222 column. The final number of buckets is this number plus 1. This would also be
223 the expanded feature size. `boundaries` specifies all the boundary values.
224 Note that each bucket is right-inclusive. That is, given boundary values
225 [b1, b2, b3], the buckets are defined as (-int, b1], (b1, b2], (b2, b3], (b3, inf).
226 For example
227 
228  data = [[2, 3], [4, 1], [2, 5]], lengths = [2, 3],
229  If boundaries = [0.1, 2.5, 1, 3.1, 4.5], then
230  output = [[0, 1, 0, 0, 1, 0, 0], [0, 0, 1, 1, 0, 0, 0], [0, 1, 0, 0, 0, 0, 1]]
231 
232  If boundaries = [0.1, 2.5, 1, 1, 3.1], then
233  output = [[0, 1, 0, 0, 0, 1, 0], [0, 0, 1, 0, 1, 0, 0], [0, 1, 0, 0, 0, 0, 1]]
234 
235 )DOC")
236  .Input(0, "data", "input tensor matrix")
237  .Input(1, "lengths", "the size is the same as the width of the `data`")
238  .Input(2, "boundaries", "bucket boundaries")
239  .Output(
240  0,
241  "output",
242  "output matrix that expands each input column with one hot encoding"
243  "based on the bucketization")
244  .TensorInferenceFunction(TensorInferenceForBucketBatchOneHot);
245 
246 OPERATOR_SCHEMA(BatchOneHot)
247  .NumInputs(3)
248  .NumOutputs(1)
249  .ValueKeyLengthInputFillers(
253  .SetDoc(R"DOC(
254 Input is a matrix tensor. Its first dimension is the batch
255 size. Expand each column of it using one hot encoding. The `lengths` specifies
256 the size of each column after encoding, and the `values` is the dictionary value
257 of one-hot encoding for each column. For example
258 
259  If data = [[2, 3], [4, 1], [2, 5]], lengths = [2, 3],
260  and values = [2, 4, 1, 3, 5], then
261 
262  output = [[1, 0, 0, 1, 0], [0, 1, 1, 0, 0], [1, 0, 0, 0, 1]]
263 )DOC")
264  .Input(0, "data", "input tensor matrix")
265  .Input(1, "lengths", "the size is the same as the width of the `data`")
266  .Input(2, "values", "one hot encoding dictionary values")
267  .Output(
268  0,
269  "output",
270  "output matrix that expands each input column with one hot encoding")
271  .TensorInferenceFunction(TensorInferenceForBatchOneHot)
272  .CostInferenceFunction(
273  OpSchema::CostInferenceFunctionType(CostInferenceForBatchOneHot));
274 
275 OPERATOR_SCHEMA(OneHot)
276  .NumInputs(2)
277  .NumOutputs(1)
278  .DisallowInputFillers() // TODO: enable the filler
279  .SetDoc(R"DOC(
280 The *OneHot* op accepts two inputs *indices* and *index_size_tensor*, and produces a single output *one_hots*. For each index in *indices* the op creates a one-hot row in *one_hots* of length *index_size_tensor* where all entries are zero except the entry at the index is 1. The size of *one_hots* is *len(indices)* x *index_size_tensor*.
281 
282 Github Links:
283 
284 - https://github.com/caffe2/caffe2/blob/master/caffe2/operators/one_hot_ops.h
285 - https://github.com/caffe2/caffe2/blob/master/caffe2/operators/one_hot_ops.cc
286 
287 
288 <details>
289 
290 <summary> <b>Example</b> </summary>
291 
292 **Code**
293 
294 ```
295 
296 workspace.ResetWorkspace()
297 
298 op = core.CreateOperator(
299  "OneHot",
300  ["indices", "index_size_tensor"],
301  ["one_hots"],
302 )
303 
304 workspace.FeedBlob("indices", np.array([0,1,2,3,4]).astype(np.long))
305 print("indices:\n", workspace.FetchBlob("indices"))
306 
307 workspace.FeedBlob("index_size_tensor", np.array([5]).astype(np.long))
308 print("index_size_tensor:\n", workspace.FetchBlob("index_size_tensor"))
309 
310 workspace.RunOperatorOnce(op)
311 print("one_hots: \n", workspace.FetchBlob("one_hots"))
312 
313 ```
314 
315 **Result**
316 
317 ```
318 
319 indices:
320  [0 1 2 3 4]
321 index_size_tensor:
322  [5]
323 one_hots:
324  [[1. 0. 0. 0. 0.]
325  [0. 1. 0. 0. 0.]
326  [0. 0. 1. 0. 0.]
327  [0. 0. 0. 1. 0.]
328  [0. 0. 0. 0. 1.]]
329 
330 ```
331 
332 </details>
333 
334 )DOC")
335  .Input(0, "indices", "The active index for each example in the batch.")
336  .Input(
337  1,
338  "index_size_tensor",
339  "Scalar with the size of the index. Must be in CPU context")
340  .Output(0, "one_hots", "Matrix of size len(indices) x index_size");
341 
342 OPERATOR_SCHEMA(SegmentOneHot)
343  .NumInputs(3)
344  .NumOutputs(1)
345  .DisallowInputFillers() // TODO: enable the filler
346  .SetDoc(R"DOC(
347 Given a sequence of indices, segmented by the lengths tensor, returns a matrix
348 that has the elements in each sequence set to 1.0, and 0.0 everywhere else.
349 )DOC")
350  .Input(0, "lengths", "Size of each segment.")
351  .Input(1, "indices", "Active indices, of size sum(lengths)")
352  .Input(2, "index_size_tensor", "Size of the index")
353  .Output(0, "one_hots", "Matrix of size len(lengths) x index_size");
354 
355 NO_GRADIENT(BatchOneHot);
356 NO_GRADIENT(OneHot);
357 NO_GRADIENT(SegmentOneHot);
358 NO_GRADIENT(BucketBatchOneHot);
359 } // namespace caffe2
const Tensor & Input(int idx, DeviceType type=CPUContext::GetDeviceType())
Retrieve a non-owning reference to the input at position &#39;idx&#39; for this operator. ...
Definition: operator.h:702
A global dictionary that holds information about what Caffe2 modules have been loaded in the current ...
Definition: blob.h:13
Definition: static.cpp:70
std::function< struct Cost(const OperatorDef &, const vector< TensorShape > &)> CostInferenceFunctionType
Registers a function that takes in an OperatorDef and a series of input shapes and returns the total ...