Caffe2 - C++ API
A deep learning, cross platform ML framework
boolean_mask_ops.cc
1 
17 #include "caffe2/operators/boolean_mask_ops.h"
18 #include "caffe2/core/operator.h"
19 #include "caffe2/core/tensor.h"
20 
21 namespace caffe2 {
22 namespace {
23 
24 template <class Context>
25 class BooleanMaskLengthsOp final : public Operator<Context> {
26  public:
27  USE_OPERATOR_CONTEXT_FUNCTIONS;
28  BooleanMaskLengthsOp(const OperatorDef& operator_def, Workspace* ws)
29  : Operator<Context>(operator_def, ws) {}
30 
31  bool RunOnDevice() override {
32  return DispatchHelper<TensorTypes<int32_t, int64_t>>::call(this, Input(0));
33  }
34 
35  template <typename T>
36  bool DoRunWithType() {
37  auto& lengths = Input(0);
38  auto& mask = Input(1);
39  auto* lengthsOut = Output(0);
40  CAFFE_ENFORCE(lengths.ndim() == 1);
41  CAFFE_ENFORCE(mask.ndim() == 1);
42  const auto* lengthsPtr = lengths.template data<T>();
43  const auto* maskPtr = mask.template data<bool>();
44  auto totalLength =
45  std::accumulate(lengthsPtr, lengthsPtr + lengths.size(), 0);
46  CAFFE_ENFORCE(mask.size() == totalLength);
47  lengthsOut->ResizeLike(lengths);
48  auto* lengthsOutPtr = lengthsOut->template mutable_data<T>();
49  int p = 0;
50  for (int i = 0; i < lengths.size(); ++i) {
51  T lengthOut = 0;
52  for (int j = 0; j < lengthsPtr[i]; ++j) {
53  if (maskPtr[p++]) {
54  ++lengthOut;
55  }
56  }
57  lengthsOutPtr[i] = lengthOut;
58  }
59  return true;
60  }
61 };
62 } // namespace
63 
64 template <>
65 bool BooleanMaskOp<CPUContext>::RunOnDevice() {
66  auto& data = Input(0);
67  auto& mask = Input(1);
68  auto* dataOut = Output(0);
69  CAFFE_ENFORCE(data.ndim() >= 1);
70  CAFFE_ENFORCE_EQ(mask.ndim(), 1);
71  CAFFE_ENFORCE(data.dims()[0] == mask.dims()[0]);
72 
73  const auto* maskPtr = mask.template data<bool>();
74  int numOutputs = 0;
75  int outerSize = mask.size();
76  for (int i = 0; i < outerSize; ++i) {
77  if (maskPtr[i]) {
78  ++numOutputs;
79  }
80  }
81  std::vector<TIndex> outShape;
82  outShape.push_back(numOutputs);
83  outShape.insert(outShape.end(), data.dims().begin() + 1, data.dims().end());
84  dataOut->Resize(outShape);
85  auto* outPtr = (char*)dataOut->raw_mutable_data(data.meta());
86 
87  int64_t* out_vec = nullptr;
88  if (OutputSize() == 2) {
89  auto* indicesOut = Output(1);
90  indicesOut->Resize(numOutputs);
91  out_vec = indicesOut->template mutable_data<int64_t>();
92  }
93 
94  if (numOutputs == 0) {
95  return true;
96  }
97  const auto innerSize = data.size_from_dim(1);
98  const auto innerSizeBytes = innerSize * data.meta().itemsize();
99 
100  TIndex lastStart = -1;
101  const auto* inPtr = (char*)data.raw_data();
102  TIndex outStart = 0;
103 
104  for (TIndex i = 0;; ++i) {
105  // mask was true and either a) became false, or b) sequence finished
106  if (lastStart != -1 && ((i >= outerSize) || !maskPtr[i])) {
107  const auto* src = inPtr + lastStart * innerSizeBytes;
108  auto* dst = outPtr + outStart * innerSizeBytes;
109  int numItems = i - lastStart;
110  context_.template CopyItems<CPUContext, CPUContext>(
111  data.meta(), numItems * innerSize, src, dst);
112  outStart += numItems;
113  lastStart = -1;
114  }
115  if (i >= outerSize) {
116  break;
117  }
118  // mask was false and became true
119  if (lastStart == -1 && maskPtr[i]) {
120  lastStart = i;
121  }
122  if (maskPtr[i] && OutputSize() == 2) {
123  *(out_vec++) = i;
124  }
125  }
126  return true;
127 }
128 
129 REGISTER_CPU_OPERATOR(BooleanMask, BooleanMaskOp<CPUContext>);
130 REGISTER_CPU_OPERATOR(BooleanMaskLengths, BooleanMaskLengthsOp<CPUContext>);
131 
132 OPERATOR_SCHEMA(BooleanMask)
133  .NumInputs(2)
134  .NumOutputs(1, 2)
135  .SetDoc(R"DOC(
136 Given a data tensor and a 1D boolean mask tensor, returns a tensor containing
137 only the elements corresponding to positions where the mask is true.
138 )DOC")
139  .Input(0, "data", "The 1D, original data tensor.")
140  .Input(1, "mask", "A tensor of bools of same shape as `data`.")
141  .Output(0, "masked_data", "A tensor of same type as `data`.")
142  .Output(1, "masked_indices", "A tensor for indices.");
143 
144 OPERATOR_SCHEMA(BooleanMaskLengths)
145  .NumInputs(2)
146  .NumOutputs(1)
147  .SetDoc(R"DOC(
148 Given a tensor of int32 segment lengths and a mask (boolean) tensor, return
149 the segment lengths of a corresponding segmented tensor after BooleanMask is
150 applied.
151 )DOC")
152  .Input(0, "lengths", "A 1D int32 tensor representing segment lengths.")
153  .Input(1, "mask", "A 1D bool tensor of values to keep.")
154  .Output(0, "masked_lengths", "Segment lengths of a masked tensor.");
155 
156 NO_GRADIENT(BooleanMask)
157 NO_GRADIENT(BooleanMaskLengths);
158 
159 const float minf = -1.0f * std::numeric_limits<float>::infinity();
160 
161 // Template this on a functor object so we can generate different
162 // implementations at compile time and have a better chance of inlining
163 template <typename Functor>
164 void MaskWithFunctor(
165  size_t N,
166  size_t M,
167  int B,
168  const float* in,
169  Functor fn,
170  float fill_val,
171  float* out) {
172  if (B >= 0) { // with batching
173  // collapse tensor to 3-dim view [B, N, M] where:
174  // B is product of dims up to and including batch
175  // N is product of dims between batch and axis, exclusive
176  // M is product of dimensions at/after axis
177  // then mask each batch [i, :, :] (note that this is N x M matrix)
178  for (int i = 0; i < B; ++i) {
179  for (int j = 0; j < N; ++j) {
180  for (int k = 0; k < M; ++k) {
181  // when [i, :, :] is laid out in row major order
182  // N * M * i + M * j + k is index of entry in N x M matrix
183  // with coordinates (row = j, col = k)
184  auto val = in[N * M * i + M * j + k];
185  out[N * M * i + M * j + k] = (fn(j, k, val) ? fill_val : val);
186  }
187  }
188  }
189  } else { // without batching
190  // TODO(T20952436): vector implementation
191  // collapse tensor to 2-dim view [N, M], where
192  // N is product of dimensions before axis
193  // M is product of dimensions at/after axis
194  // and mask N by M matrix
195  for (int i = 0; i < N; ++i) {
196  for (int j = 0; j < M; ++j) {
197  auto val = in[M * i + j];
198  out[M * i + j] = (fn(i, j, val) ? fill_val : val);
199  }
200  }
201  }
202 }
203 
204 // Repeat masking along continuous segments (right axes) of size D
205 template <typename Functor>
206 void RepeatedMaskWithFunctor(
207  size_t N,
208  size_t M,
209  int D,
210  const float* in,
211  Functor fn,
212  float fill_val,
213  float* out) {
214  for (int i = 0; i < N; ++i) {
215  for (int j = 0; j < M; ++j) {
216  for (int k = 0; k < D; ++k) {
217  auto val = in[M * D * i + D * j + k];
218  out[M * D * i + D * j + k] = (fn(i, j, val) ? fill_val : val);
219  }
220  }
221  }
222 }
223 
224 namespace {
225 
226 class SequenceFunctor {
227  public:
228  explicit SequenceFunctor(const int* sl, const size_t len)
229  : sl_(sl), len_(len) {}
230  bool operator()(int i, int j, float /* val*/) {
231  CAFFE_ENFORCE(i < len_, "Out of bound.");
232  return j >= sl_[i];
233  }
234 
235  private:
236  const int* sl_;
237  const size_t len_;
238 };
239 
240 class WindowFunctor {
241  public:
242  explicit WindowFunctor(const int* c, int r) : c(c), r(r) {}
243  bool operator()(int i, int j, float /* val*/) {
244  return j > c[i] + r || j < c[i] - r;
245  }
246 
247  private:
248  const int* c;
249  const int r;
250 };
251 
252 class UpperFunctor {
253  public:
254  bool operator()(int i, int j, float /* val */) {
255  return j > i;
256  }
257 };
258 
259 class LowerFunctor {
260  public:
261  bool operator()(int i, int j, float /* val */) {
262  return j < i;
263  }
264 };
265 
266 class UpperDiagFunctor {
267  public:
268  bool operator()(int i, int j, float /* val */) {
269  return j >= i;
270  }
271 };
272 
273 class LowerDiagFunctor {
274  public:
275  bool operator()(int i, int j, float /* val */) {
276  return j <= i;
277  }
278 };
279 
280 } // namespace
281 
282 template <>
283 bool SequenceMaskOp<CPUContext>::RunOnDevice() {
284  return DispatchHelper<TensorTypes<float>>::call(this, Input(0));
285 }
286 
287 template <>
288 template <class T>
289 bool SequenceMaskOp<CPUContext>::DoRunWithType() {
290  const Tensor<CPUContext>* input = &Input(0);
291  const Tensor<CPUContext>* sequence_lengths = nullptr;
292  const Tensor<CPUContext>* window_centers = nullptr;
293 
294  if (mode_ == "sequence") {
295  sequence_lengths = &Input(1);
296  } else if (mode_ == "window") {
297  window_centers = &Input(1);
298  }
299 
300  auto* output = Output(0);
301  output->ResizeLike(*input);
302 
303  const auto canonical_axis = input->canonical_axis_index(axis_);
304 
305  // canonical_batch is non-negative if batching, -1 otherwise
306  int canonical_batch = -1;
307  if ((HasArgument("batch"))) {
308  canonical_batch = input->canonical_axis_index(batch_);
309  }
310 
311  // make sure batch < axis
312  if (canonical_batch >= 0) {
313  CAFFE_ENFORCE_LT(canonical_batch, canonical_axis);
314  }
315 
316  // if no batch, then left is product of dims up to axis
317  // otherwise, left is product of dims between batch and axis
318  const int left =
319  (canonical_batch >= 0
320  ? input->size_between_dim(canonical_batch, canonical_axis)
321  : input->size_to_dim(canonical_axis));
322  const int right = input->size_from_dim(canonical_axis);
323 
324  // product of dims from 1 to batch
325  const int batch_dim =
326  (canonical_batch >= 0
327  ? input->size_to_dim(canonical_batch) * input->dim(canonical_batch)
328  : -1);
329 
330  T fill_val = convert::To<float, T>(grad_ ? 0.0f : fill_val_);
331  if (mode_ == "sequence") {
332  CAFFE_ENFORCE(
333  sequence_lengths, "Sequence length not provided for mode 'sequence'!");
334  if (HasArgument("repeat_from_axis")) {
335  const int canonical_repeat_from =
336  input->canonical_axis_index(repeat_from_);
337  const int repeated_dims = input->size_from_dim(canonical_repeat_from);
338  const int masked_dims = right / repeated_dims;
339  RepeatedMaskWithFunctor(
340  left,
341  masked_dims,
342  repeated_dims,
343  input->data<T>(),
344  SequenceFunctor(
345  sequence_lengths->data<int>(), sequence_lengths->size()),
346  fill_val,
347  output->mutable_data<T>());
348  } else {
349  MaskWithFunctor(
350  left,
351  right,
352  batch_dim,
353  input->data<T>(),
354  SequenceFunctor(
355  sequence_lengths->data<int>(), sequence_lengths->size()),
356  fill_val,
357  output->mutable_data<T>());
358  }
359  } else if (mode_ == "window") {
360  MaskWithFunctor(
361  left,
362  right,
363  batch_dim,
364  input->data<T>(),
365  WindowFunctor(window_centers->data<int>(), radius_),
366  fill_val,
367  output->mutable_data<T>());
368  } else if (mode_ == "upper") {
369  MaskWithFunctor(
370  left,
371  right,
372  batch_dim,
373  input->data<T>(),
374  UpperFunctor(),
375  fill_val,
376  output->mutable_data<T>());
377  } else if (mode_ == "lower") {
378  MaskWithFunctor(
379  left,
380  right,
381  batch_dim,
382  input->data<T>(),
383  LowerFunctor(),
384  fill_val,
385  output->mutable_data<T>());
386  } else if (mode_ == "upperdiag") {
387  MaskWithFunctor(
388  left,
389  right,
390  batch_dim,
391  input->data<T>(),
392  UpperDiagFunctor(),
393  fill_val,
394  output->mutable_data<T>());
395  } else if (mode_ == "lowerdiag") {
396  MaskWithFunctor(
397  left,
398  right,
399  batch_dim,
400  input->data<T>(),
401  LowerDiagFunctor(),
402  fill_val,
403  output->mutable_data<T>());
404  } else {
405  CAFFE_ENFORCE(false, "Unsupported mode for SequenceMaskOp!");
406  return false;
407  }
408 
409  return true;
410 }
411 
412 REGISTER_CPU_OPERATOR(SequenceMask, SequenceMaskOp<CPUContext>);
413 
414 OPERATOR_SCHEMA(SequenceMask)
415  .NumInputs(1, 2)
416  .NumOutputs(1)
417  .SetDoc(R"DOC(
418 Mask op designed for use in attention mechanisms for sequence modeling tasks.
419 Supports batching: given batch_dim, collapses dims 0 through batch_dim into a
420 single dimension, e.g. if tensor dims are [4,2,1,3,4] and batch_dim=2, first
421 collapse tensor to [4*2*1,3,4], then mask each batch [i,:,:].
422 
423 
424 Two current operating modes:
425 
426 
427 1) Given a 2D input tensor and 1D tensor of sequence lengths, for each row i in
428 the input tensor, set elements in that row to -inf if their column index
429 j >= sequence_lengths[i]. This mode takes two inputs and argument mode =
430 'sequence'
431 
432 
433 2) Triangular mask. Given row index i and column index j, set elements to -inf
434 given the following conditions:
435 
436  mode='upper', x_ij = -inf if j < i
437  mode='lower', x_ij = -inf if j > i
438  mode='upperdiag', x_ij = -inf if j <= i
439  mode='lowerdiag', x_ij = -inf if j >= i
440 
441 This mode takes one input.
442 
443 
444 3) Window Mask. Given a 2D input tensor and 1D tensor of window centers,
445 for each row i in the input tensor, set elements in that row to -inf
446 if their column index j outside [center - radius, center + radius].
447 This mode takes two inputs and argument mode = 'sequence'.
448 Argument 'radius' should be provided.
449 )DOC")
450  .Input(0, "input", "Tensor to apply masking to")
451  .Input(1, "sequence_lengths", "1D Tensor of sequence lengths for mode #1")
452  .Output(0, "masked_tensor", "Input tensor with masking applied")
453  .Arg(
454  "mode",
455  "(string) Mode selection. Possible values: "
456  "'sequence', 'upper', 'lower', 'upperdiag', 'lowerdiag'")
457  .Arg(
458  "axis",
459  "(int) Beginning axis of row elements. All dimensions to the left "
460  "will be treated as row indices and those to the right (inclusive) "
461  "will be treated as column indices in the 2D mask")
462  .Arg("grad", "(bool) operate in gradient mode")
463  .Arg("radius", "(int) radius of windows in window mode")
464  .Arg("batch", "(int) batch dimension of tensor (optional)")
465  .Arg(
466  "repeat_from_axis",
467  "(int) used when mask should be repeated for "
468  "one or more data dimensions (beginning at this axis). "
469  "(currently only supported for sequence mode without batch argument)");
470 
471 class GetSequenceMaskGradient : public GradientMakerBase {
472  using GradientMakerBase::GradientMakerBase;
473  vector<OperatorDef> GetGradientDefs() override {
474  vector<Argument> args;
475  args.reserve(Def().arg().size());
476  for (const auto& x : Def().arg()) {
477  args.push_back(x);
478  }
479  args.push_back(MakeArgument<bool>("grad", true));
480  if (def_.input_size() == 1) {
481  return SingleGradientDef(
482  "SequenceMask",
483  "",
484  vector<string>{GO(0)},
485  vector<string>{GI(0)},
486  args);
487  } else {
488  return SingleGradientDef(
489  "SequenceMask",
490  "",
491  vector<string>{GO(0), I(1)},
492  vector<string>{GI(0)},
493  args);
494  }
495  }
496 
497  bool CopyArguments() const override {
498  return false;
499  }
500 };
501 
502 REGISTER_GRADIENT(SequenceMask, GetSequenceMaskGradient);
503 
504 } // namespace caffe2
Definition: types.h:88
Copyright (c) 2016-present, Facebook, Inc.
Copyright (c) 2016-present, Facebook, Inc.