Caffe2 - C++ API
A deep learning, cross platform ML framework
boolean_mask_ops.cc
1 #include "caffe2/operators/boolean_mask_ops.h"
2 #include "caffe2/core/operator.h"
3 #include "caffe2/core/tensor.h"
4 
5 namespace caffe2 {
6 namespace {
7 
8 template <class Context>
9 class BooleanMaskLengthsOp final : public Operator<Context> {
10  public:
11  USE_OPERATOR_CONTEXT_FUNCTIONS;
12  template <class... Args>
13  explicit BooleanMaskLengthsOp(Args&&... args)
14  : Operator<Context>(std::forward<Args>(args)...) {}
15 
16  bool RunOnDevice() override {
17  return DispatchHelper<TensorTypes<int32_t, int64_t>>::call(this, Input(0));
18  }
19 
20  template <typename T>
21  bool DoRunWithType() {
22  auto& lengths = Input(0);
23  auto& mask = Input(1);
24 
25  CAFFE_ENFORCE(lengths.dim() == 1);
26  CAFFE_ENFORCE(mask.dim() == 1);
27  const auto* lengthsPtr = lengths.template data<T>();
28  const auto* maskPtr = mask.template data<bool>();
29  auto totalLength =
30  std::accumulate(lengthsPtr, lengthsPtr + lengths.numel(), 0);
31  CAFFE_ENFORCE(mask.numel() == totalLength);
32  auto* lengthsOut = Output(0, lengths.sizes(), at::dtype<T>());
33  auto* lengthsOutPtr = lengthsOut->template mutable_data<T>();
34  int p = 0;
35  for (int i = 0; i < lengths.numel(); ++i) {
36  T lengthOut = 0;
37  for (int j = 0; j < lengthsPtr[i]; ++j) {
38  if (maskPtr[p++]) {
39  ++lengthOut;
40  }
41  }
42  lengthsOutPtr[i] = lengthOut;
43  }
44  return true;
45  }
46 };
47 } // namespace
48 
49 template <>
50 bool BooleanMaskOp<CPUContext>::RunOnDevice() {
51  auto& data = Input(0);
52  auto& mask = Input(1);
53  auto* dataOut = Output(0);
54  CAFFE_ENFORCE(data.dim() >= 1);
55  CAFFE_ENFORCE_EQ(mask.dim(), 1);
56  CAFFE_ENFORCE(data.size(0) == mask.size(0));
57 
58  const auto* maskPtr = mask.template data<bool>();
59  int numOutputs = 0;
60  int outerSize = mask.numel();
61  for (int i = 0; i < outerSize; ++i) {
62  if (maskPtr[i]) {
63  ++numOutputs;
64  }
65  }
66  std::vector<int64_t> outShape;
67  outShape.push_back(numOutputs);
68  outShape.insert(outShape.end(), data.sizes().begin() + 1, data.sizes().end());
69  dataOut->Resize(outShape);
70  auto* outPtr = (char*)dataOut->raw_mutable_data(data.dtype());
71 
72  int64_t* out_vec = nullptr;
73  if (OutputSize() == 2) {
74  auto* indicesOut = Output(1, {numOutputs}, at::dtype<int64_t>());
75  out_vec = indicesOut->template mutable_data<int64_t>();
76  }
77 
78  if (numOutputs == 0) {
79  return true;
80  }
81  const auto innerSize = data.size_from_dim(1);
82  const auto innerSizeBytes = innerSize * data.dtype().itemsize();
83 
84  int64_t lastStart = -1;
85  const auto* inPtr = (char*)data.raw_data();
86  int64_t outStart = 0;
87 
88  for (int64_t i = 0;; ++i) {
89  // mask was true and either a) became false, or b) sequence finished
90  if (lastStart != -1 && ((i >= outerSize) || !maskPtr[i])) {
91  const auto* src = inPtr + lastStart * innerSizeBytes;
92  auto* dst = outPtr + outStart * innerSizeBytes;
93  int numItems = i - lastStart;
94  context_.CopyItemsSameDevice(
95  data.dtype(), numItems * innerSize, src, dst);
96  outStart += numItems;
97  lastStart = -1;
98  }
99  if (i >= outerSize) {
100  break;
101  }
102  // mask was false and became true
103  if (lastStart == -1 && maskPtr[i]) {
104  lastStart = i;
105  }
106  if (maskPtr[i] && OutputSize() == 2) {
107  *(out_vec++) = i;
108  }
109  }
110  return true;
111 }
112 
113 REGISTER_CPU_OPERATOR(BooleanMask, BooleanMaskOp<CPUContext>);
114 REGISTER_CPU_OPERATOR(BooleanMaskLengths, BooleanMaskLengthsOp<CPUContext>);
115 
116 OPERATOR_SCHEMA(BooleanMask)
117  .NumInputs(2)
118  .NumOutputs(1, 2)
119  .SetDoc(R"DOC(
120 Given a 1D `data` tensor and a boolean `mask` tensor of the same shape, returns a `masked_data` tensor containing only the elements corresponding to positions where the `mask` is True, and a `masked_indices` tensor containing the indices of the True elements.
121 
122 
123 Github Links:
124 - https://github.com/pytorch/pytorch/blob/master/caffe2/operators/boolean_mask_ops.cc
125 
126 <details>
127 
128 <summary> <b>Example</b> </summary>
129 
130 **Code**
131 
132 ```
133 
134 workspace.ResetWorkspace()
135 
136 op = core.CreateOperator(
137  "BooleanMask",
138  ["data", "mask"],
139  ["masked_data", "masked_indices"]
140 )
141 
142 workspace.FeedBlob("data", np.array([1,2,3,4,5,6]))
143 workspace.FeedBlob("mask", np.array([True,False,False,True,True,False]))
144 print("data:", workspace.FetchBlob("data"))
145 print("mask:", workspace.FetchBlob("mask"))
146 workspace.RunOperatorOnce(op)
147 print("masked_data:", workspace.FetchBlob("masked_data"))
148 print("masked_indices:", workspace.FetchBlob("masked_indices"))
149 
150 ```
151 
152 **Result**
153 
154 ```
155 
156 data: [1 2 3 4 5 6]
157 mask: [ True False False True True False]
158 masked_data: [1 4 5]
159 masked_indices: [0 3 4]
160 
161 ```
162 
163 </details>
164 
165 )DOC")
166  .Input(0, "data", "(*Tensor*): 1D input tensor")
167  .Input(1, "mask", "(*Tensor`<bool>`*): tensor of bools which determines the input elements that will be left in the `masked_data` output tensor; same shape as `data`")
168  .Output(0, "masked_data", "(*Tensor*): 1D tensor of same type as `data` input that contains the masked input tensor")
169  .Output(1, "masked_indices", "(*Tensor`<int>`*): 1D tensor of indices of the True elements in the `mask` tensor");
170 
171 OPERATOR_SCHEMA(BooleanMaskLengths)
172  .NumInputs(2)
173  .NumOutputs(1)
174  .SetDoc(R"DOC(
175 Given a tensor of int32 `lengths` tensor representing segment lengths and a `mask` (boolean) tensor, return the segment lengths of the corresponding segmented tensor after **BooleanMask** is applied.
176 
177 If `lengths` tensor is $[a_1, a_2, ..., a_n]$, then length of `mask` tensor must be $a_1 + a_2 + ... + a_n$.
178 
179 
180 Github Links:
181 - https://github.com/pytorch/pytorch/blob/master/caffe2/operators/boolean_mask_ops.cc
182 
183 <details>
184 
185 <summary> <b>Example</b> </summary>
186 
187 **Code**
188 
189 ```
190 
191 workspace.ResetWorkspace()
192 
193 op = core.CreateOperator(
194  "BooleanMaskLengths",
195  ["lengths", "mask"],
196  ["masked_lengths"]
197 )
198 
199 workspace.FeedBlob("lengths", np.array([1,3,2], dtype=np.int32))
200 workspace.FeedBlob("mask", np.array([False,True,True,False,True,True]))
201 print("lengths:", workspace.FetchBlob("lengths"))
202 print("mask:", workspace.FetchBlob("mask"))
203 workspace.RunOperatorOnce(op)
204 print("masked_lengths:", workspace.FetchBlob("masked_lengths"))
205 
206 ```
207 
208 **Result**
209 
210 ```
211 
212 lengths: [1 3 2]
213 mask: [False True True False True True]
214 masked_lengths: [0 2 2]
215 
216 ```
217 
218 </details>
219 
220 )DOC")
221  .Input(0, "lengths", "(*Tensor`<int>`*): input tensor containing segment lengths")
222  .Input(1, "mask", "(*Tensor`<bool>`*): A 1D bool tensor of values to keep.")
223  .Output(0, "masked_lengths", "(*Tensor`<int>`*): 1D tensor of same type as inputs that contains the sequence");
224 
225 NO_GRADIENT(BooleanMask)
226 NO_GRADIENT(BooleanMaskLengths);
227 
228 const float minf = -1.0f * std::numeric_limits<float>::infinity();
229 
230 // Template this on a functor object so we can generate different
231 // implementations at compile time and have a better chance of inlining
232 template <typename Functor>
233 void MaskWithFunctor(
234  size_t N,
235  size_t M,
236  int B,
237  const float* in,
238  Functor fn,
239  float fill_val,
240  float* out) {
241  if (B >= 0) { // with batching
242  // collapse tensor to 3-dim view [B, N, M] where:
243  // B is product of dims up to and including batch
244  // N is product of dims between batch and axis, exclusive
245  // M is product of dimensions at/after axis
246  // then mask each batch [i, :, :] (note that this is N x M matrix)
247  for (int i = 0; i < B; ++i) {
248  for (int j = 0; j < N; ++j) {
249  for (int k = 0; k < M; ++k) {
250  // when [i, :, :] is laid out in row major order
251  // N * M * i + M * j + k is index of entry in N x M matrix
252  // with coordinates (row = j, col = k)
253  auto val = in[N * M * i + M * j + k];
254  out[N * M * i + M * j + k] = (fn(j, k, val) ? fill_val : val);
255  }
256  }
257  }
258  } else { // without batching
259  // TODO(T20952436): vector implementation
260  // collapse tensor to 2-dim view [N, M], where
261  // N is product of dimensions before axis
262  // M is product of dimensions at/after axis
263  // and mask N by M matrix
264  for (int i = 0; i < N; ++i) {
265  for (int j = 0; j < M; ++j) {
266  auto val = in[M * i + j];
267  out[M * i + j] = (fn(i, j, val) ? fill_val : val);
268  }
269  }
270  }
271 }
272 
273 // Repeat masking along continuous segments (right axes) of size D
274 template <typename Functor>
275 void RepeatedMaskWithFunctor(
276  size_t N,
277  size_t M,
278  int D,
279  const float* in,
280  Functor fn,
281  float fill_val,
282  float* out) {
283  for (int i = 0; i < N; ++i) {
284  for (int j = 0; j < M; ++j) {
285  for (int k = 0; k < D; ++k) {
286  auto val = in[M * D * i + D * j + k];
287  out[M * D * i + D * j + k] = (fn(i, j, val) ? fill_val : val);
288  }
289  }
290  }
291 }
292 
293 namespace {
294 
295 class SequenceFunctor {
296  public:
297  explicit SequenceFunctor(const int* sl, const size_t len)
298  : sl_(sl), len_(len) {}
299  bool operator()(int i, int j, float /* val*/) {
300  CAFFE_ENFORCE(i < len_, "Out of bound.");
301  return j >= sl_[i];
302  }
303 
304  private:
305  const int* sl_;
306  const size_t len_;
307 };
308 
309 class WindowFunctor {
310  public:
311  explicit WindowFunctor(const int* c, int r) : c(c), r(r) {}
312  bool operator()(int i, int j, float /* val*/) {
313  return j > c[i] + r || j < c[i] - r;
314  }
315 
316  private:
317  const int* c;
318  const int r;
319 };
320 
321 class UpperFunctor {
322  public:
323  bool operator()(int i, int j, float /* val */) {
324  return j > i;
325  }
326 };
327 
328 class LowerFunctor {
329  public:
330  bool operator()(int i, int j, float /* val */) {
331  return j < i;
332  }
333 };
334 
335 class UpperDiagFunctor {
336  public:
337  bool operator()(int i, int j, float /* val */) {
338  return j >= i;
339  }
340 };
341 
342 class LowerDiagFunctor {
343  public:
344  bool operator()(int i, int j, float /* val */) {
345  return j <= i;
346  }
347 };
348 
349 } // namespace
350 
351 template <>
352 bool SequenceMaskOp<CPUContext>::RunOnDevice() {
353  return DispatchHelper<TensorTypes<float>>::call(this, Input(0));
354 }
355 
356 template <>
357 template <class T>
358 bool SequenceMaskOp<CPUContext>::DoRunWithType() {
359  const Tensor* input = &Input(0);
360  const Tensor* sequence_lengths = nullptr;
361  const Tensor* window_centers = nullptr;
362 
363  if (mode_ == "sequence") {
364  sequence_lengths = &Input(1);
365  } else if (mode_ == "window") {
366  window_centers = &Input(1);
367  }
368 
369  auto* output = Output(0, input->sizes(), at::dtype<T>());
370 
371  const auto canonical_axis = input->canonical_axis_index(axis_);
372 
373  // canonical_batch is non-negative if batching, -1 otherwise
374  int canonical_batch = -1;
375  if ((HasArgument("batch"))) {
376  canonical_batch = input->canonical_axis_index(batch_);
377  }
378 
379  // make sure batch < axis
380  if (canonical_batch >= 0) {
381  CAFFE_ENFORCE_LT(canonical_batch, canonical_axis);
382  }
383 
384  // if no batch, then left is product of dims up to axis
385  // otherwise, left is product of dims between batch and axis
386  const int left =
387  (canonical_batch >= 0
388  ? input->size_between_dim(canonical_batch, canonical_axis)
389  : input->size_to_dim(canonical_axis));
390  const int right = input->size_from_dim(canonical_axis);
391 
392  // product of dims from 1 to batch
393  const int batch_dim =
394  (canonical_batch >= 0
395  ? input->size_to_dim(canonical_batch) * input->size(canonical_batch)
396  : -1);
397 
398  T fill_val = convert::To<float, T>(grad_ ? 0.0f : fill_val_);
399  if (mode_ == "sequence") {
400  CAFFE_ENFORCE(
401  sequence_lengths, "Sequence length not provided for mode 'sequence'!");
402  if (HasArgument("repeat_from_axis")) {
403  const int canonical_repeat_from =
404  input->canonical_axis_index(repeat_from_);
405  const int repeated_dims = input->size_from_dim(canonical_repeat_from);
406  const int masked_dims = right / repeated_dims;
407  RepeatedMaskWithFunctor(
408  left,
409  masked_dims,
410  repeated_dims,
411  input->data<T>(),
412  SequenceFunctor(
413  sequence_lengths->data<int>(), sequence_lengths->numel()),
414  fill_val,
415  output->template mutable_data<T>());
416  } else {
417  MaskWithFunctor(
418  left,
419  right,
420  batch_dim,
421  input->data<T>(),
422  SequenceFunctor(
423  sequence_lengths->data<int>(), sequence_lengths->numel()),
424  fill_val,
425  output->template mutable_data<T>());
426  }
427  } else if (mode_ == "window") {
428  MaskWithFunctor(
429  left,
430  right,
431  batch_dim,
432  input->data<T>(),
433  WindowFunctor(window_centers->data<int>(), radius_),
434  fill_val,
435  output->template mutable_data<T>());
436  } else if (mode_ == "upper") {
437  MaskWithFunctor(
438  left,
439  right,
440  batch_dim,
441  input->data<T>(),
442  UpperFunctor(),
443  fill_val,
444  output->template mutable_data<T>());
445  } else if (mode_ == "lower") {
446  MaskWithFunctor(
447  left,
448  right,
449  batch_dim,
450  input->data<T>(),
451  LowerFunctor(),
452  fill_val,
453  output->template mutable_data<T>());
454  } else if (mode_ == "upperdiag") {
455  MaskWithFunctor(
456  left,
457  right,
458  batch_dim,
459  input->data<T>(),
460  UpperDiagFunctor(),
461  fill_val,
462  output->template mutable_data<T>());
463  } else if (mode_ == "lowerdiag") {
464  MaskWithFunctor(
465  left,
466  right,
467  batch_dim,
468  input->data<T>(),
469  LowerDiagFunctor(),
470  fill_val,
471  output->template mutable_data<T>());
472  } else {
473  CAFFE_ENFORCE(false, "Unsupported mode for SequenceMaskOp!");
474  return false;
475  }
476 
477  return true;
478 }
479 
480 REGISTER_CPU_OPERATOR(SequenceMask, SequenceMaskOp<CPUContext>);
481 
482 OPERATOR_SCHEMA(SequenceMask)
483  .NumInputs(1, 2)
484  .NumOutputs(1)
485  .SetDoc(R"DOC(
486 Mask op designed for use in attention mechanisms for sequence modeling tasks.
487 Supports batching: given batch_dim, collapses dims 0 through batch_dim into a
488 single dimension, e.g. if tensor dims are [4,2,1,3,4] and batch_dim=2, first
489 collapse tensor to [4*2*1,3,4], then mask each batch [i,:,:].
490 
491 
492 Two current operating modes:
493 
494 
495 1) Given a 2D input tensor and 1D tensor of sequence lengths, for each row i in
496 the input tensor, set elements in that row to -inf if their column index
497 j >= sequence_lengths[i]. This mode takes two inputs and argument mode =
498 'sequence'
499 
500 
501 2) Triangular mask. Given row index i and column index j, set elements to -inf
502 given the following conditions:
503 
504  mode='upper', x_ij = -inf if j < i
505  mode='lower', x_ij = -inf if j > i
506  mode='upperdiag', x_ij = -inf if j <= i
507  mode='lowerdiag', x_ij = -inf if j >= i
508 
509 This mode takes one input.
510 
511 
512 3) Window Mask. Given a 2D input tensor and 1D tensor of window centers,
513 for each row i in the input tensor, set elements in that row to -inf
514 if their column index j outside [center - radius, center + radius].
515 This mode takes two inputs and argument mode = 'sequence'.
516 Argument 'radius' should be provided.
517 )DOC")
518  .Input(0, "input", "Tensor to apply masking to")
519  .Input(1, "sequence_lengths", "1D Tensor of sequence lengths for mode #1")
520  .Output(0, "masked_tensor", "Input tensor with masking applied")
521  .Arg(
522  "mode",
523  "(string) Mode selection. Possible values: "
524  "'sequence', 'upper', 'lower', 'upperdiag', 'lowerdiag'")
525  .Arg(
526  "axis",
527  "(int) Beginning axis of row elements. All dimensions to the left "
528  "will be treated as row indices and those to the right (inclusive) "
529  "will be treated as column indices in the 2D mask")
530  .Arg("grad", "(bool) operate in gradient mode")
531  .Arg("radius", "(int) radius of windows in window mode")
532  .Arg("batch", "(int) batch dimension of tensor (optional)")
533  .Arg(
534  "repeat_from_axis",
535  "(int) used when mask should be repeated for "
536  "one or more data dimensions (beginning at this axis). "
537  "(currently only supported for sequence mode without batch argument)");
538 
539 class GetSequenceMaskGradient : public GradientMakerBase {
540  using GradientMakerBase::GradientMakerBase;
541  vector<OperatorDef> GetGradientDefs() override {
542  vector<Argument> args;
543  args.reserve(Def().arg().size());
544  for (const auto& x : Def().arg()) {
545  args.push_back(x);
546  }
547  args.push_back(MakeArgument<bool>("grad", true));
548  if (def_.input_size() == 1) {
549  return SingleGradientDef(
550  "SequenceMask",
551  "",
552  vector<string>{GO(0)},
553  vector<string>{GI(0)},
554  args);
555  } else {
556  return SingleGradientDef(
557  "SequenceMask",
558  "",
559  vector<string>{GO(0), I(1)},
560  vector<string>{GI(0)},
561  args);
562  }
563  }
564 
565  bool CopyArguments() const override {
566  return false;
567  }
568 };
569 
570 REGISTER_GRADIENT(SequenceMask, GetSequenceMaskGradient);
571 
572 } // namespace caffe2
Definition: any.cpp:108
A global dictionary that holds information about what Caffe2 modules have been loaded in the current ...
Definition: blob.h:13
Definition: static.cpp:58
Definition: static.cpp:70