1 #include "caffe2/operators/boolean_mask_ops.h" 2 #include "caffe2/core/operator.h" 3 #include "caffe2/core/tensor.h" 8 template <
class Context>
9 class BooleanMaskLengthsOp final :
public Operator<Context> {
11 USE_OPERATOR_CONTEXT_FUNCTIONS;
12 template <
class... Args>
13 explicit BooleanMaskLengthsOp(Args&&... args)
14 : Operator<Context>(
std::forward<Args>(args)...) {}
16 bool RunOnDevice()
override {
17 return DispatchHelper<TensorTypes<int32_t, int64_t>>::call(
this, Input(0));
21 bool DoRunWithType() {
22 auto& lengths = Input(0);
23 auto& mask = Input(1);
25 CAFFE_ENFORCE(lengths.dim() == 1);
26 CAFFE_ENFORCE(mask.dim() == 1);
27 const auto* lengthsPtr = lengths.template data<T>();
28 const auto* maskPtr = mask.template data<bool>();
30 std::accumulate(lengthsPtr, lengthsPtr + lengths.numel(), 0);
31 CAFFE_ENFORCE(mask.numel() == totalLength);
32 auto* lengthsOut = Output(0, lengths.sizes(), at::dtype<T>());
33 auto* lengthsOutPtr = lengthsOut->template mutable_data<T>();
35 for (
int i = 0; i < lengths.numel(); ++i) {
37 for (
int j = 0; j < lengthsPtr[i]; ++j) {
42 lengthsOutPtr[i] = lengthOut;
50 bool BooleanMaskOp<CPUContext>::RunOnDevice() {
51 auto& data = Input(0);
52 auto& mask = Input(1);
53 auto* dataOut = Output(0);
54 CAFFE_ENFORCE(data.dim() >= 1);
55 CAFFE_ENFORCE_EQ(mask.dim(), 1);
56 CAFFE_ENFORCE(data.size(0) == mask.size(0));
58 const auto* maskPtr = mask.template data<bool>();
60 int outerSize = mask.numel();
61 for (
int i = 0; i < outerSize; ++i) {
66 std::vector<int64_t> outShape;
67 outShape.push_back(numOutputs);
68 outShape.insert(outShape.end(), data.sizes().begin() + 1, data.sizes().end());
69 dataOut->Resize(outShape);
70 auto* outPtr = (
char*)dataOut->raw_mutable_data(data.dtype());
72 int64_t* out_vec =
nullptr;
73 if (OutputSize() == 2) {
74 auto* indicesOut = Output(1, {numOutputs}, at::dtype<int64_t>());
75 out_vec = indicesOut->template mutable_data<int64_t>();
78 if (numOutputs == 0) {
81 const auto innerSize = data.size_from_dim(1);
82 const auto innerSizeBytes = innerSize * data.dtype().itemsize();
84 int64_t lastStart = -1;
85 const auto* inPtr = (
char*)data.raw_data();
88 for (int64_t i = 0;; ++i) {
90 if (lastStart != -1 && ((i >= outerSize) || !maskPtr[i])) {
91 const auto* src = inPtr + lastStart * innerSizeBytes;
92 auto* dst = outPtr + outStart * innerSizeBytes;
93 int numItems = i - lastStart;
94 context_.CopyItemsSameDevice(
95 data.dtype(), numItems * innerSize, src, dst);
103 if (lastStart == -1 && maskPtr[i]) {
106 if (maskPtr[i] && OutputSize() == 2) {
113 REGISTER_CPU_OPERATOR(BooleanMask, BooleanMaskOp<CPUContext>);
114 REGISTER_CPU_OPERATOR(BooleanMaskLengths, BooleanMaskLengthsOp<CPUContext>);
116 OPERATOR_SCHEMA(BooleanMask)
120 Given a 1D `data` tensor and a boolean `mask` tensor of the same shape, returns a `masked_data` tensor containing only the elements corresponding to positions where the `mask` is True, and a `masked_indices` tensor containing the indices of the True elements. 124 - https://github.com/pytorch/pytorch/blob/master/caffe2/operators/boolean_mask_ops.cc 128 <summary> <b>Example</b> </summary> 134 workspace.ResetWorkspace() 136 op = core.CreateOperator( 139 ["masked_data", "masked_indices"] 142 workspace.FeedBlob("data", np.array([1,2,3,4,5,6])) 143 workspace.FeedBlob("mask", np.array([True,False,False,True,True,False])) 144 print("data:", workspace.FetchBlob("data")) 145 print("mask:", workspace.FetchBlob("mask")) 146 workspace.RunOperatorOnce(op) 147 print("masked_data:", workspace.FetchBlob("masked_data")) 148 print("masked_indices:", workspace.FetchBlob("masked_indices")) 157 mask: [ True False False True True False] 159 masked_indices: [0 3 4] 166 .Input(0, "data",
"(*Tensor*): 1D input tensor")
167 .Input(1,
"mask",
"(*Tensor`<bool>`*): tensor of bools which determines the input elements that will be left in the `masked_data` output tensor; same shape as `data`")
168 .Output(0,
"masked_data",
"(*Tensor*): 1D tensor of same type as `data` input that contains the masked input tensor")
169 .Output(1,
"masked_indices",
"(*Tensor`<int>`*): 1D tensor of indices of the True elements in the `mask` tensor");
171 OPERATOR_SCHEMA(BooleanMaskLengths)
175 Given a tensor of int32 `lengths` tensor representing segment lengths and a `mask` (boolean) tensor, return the segment lengths of the corresponding segmented tensor after **BooleanMask** is applied. 177 If `lengths` tensor is $[a_1, a_2, ..., a_n]$, then length of `mask` tensor must be $a_1 + a_2 + ... + a_n$. 181 - https://github.com/pytorch/pytorch/blob/master/caffe2/operators/boolean_mask_ops.cc 185 <summary> <b>Example</b> </summary> 191 workspace.ResetWorkspace() 193 op = core.CreateOperator( 194 "BooleanMaskLengths", 199 workspace.FeedBlob("lengths", np.array([1,3,2], dtype=np.int32)) 200 workspace.FeedBlob("mask", np.array([False,True,True,False,True,True])) 201 print("lengths:", workspace.FetchBlob("lengths")) 202 print("mask:", workspace.FetchBlob("mask")) 203 workspace.RunOperatorOnce(op) 204 print("masked_lengths:", workspace.FetchBlob("masked_lengths")) 213 mask: [False True True False True True] 214 masked_lengths: [0 2 2] 221 .Input(0, "lengths",
"(*Tensor`<int>`*): input tensor containing segment lengths")
222 .Input(1,
"mask",
"(*Tensor`<bool>`*): A 1D bool tensor of values to keep.")
223 .Output(0,
"masked_lengths",
"(*Tensor`<int>`*): 1D tensor of same type as inputs that contains the sequence");
225 NO_GRADIENT(BooleanMask)
226 NO_GRADIENT(BooleanMaskLengths);
228 const
float minf = -1.0f *
std::numeric_limits<
float>::infinity();
232 template <typename Functor>
233 void MaskWithFunctor(
247 for (
int i = 0; i < B; ++i) {
248 for (
int j = 0; j < N; ++j) {
249 for (
int k = 0; k < M; ++k) {
253 auto val = in[N * M * i + M * j + k];
254 out[N * M * i + M * j + k] = (fn(j, k, val) ? fill_val : val);
264 for (
int i = 0; i < N; ++i) {
265 for (
int j = 0; j < M; ++j) {
266 auto val = in[M * i + j];
267 out[M * i + j] = (fn(i, j, val) ? fill_val : val);
274 template <
typename Functor>
275 void RepeatedMaskWithFunctor(
283 for (
int i = 0; i < N; ++i) {
284 for (
int j = 0; j < M; ++j) {
285 for (
int k = 0; k < D; ++k) {
286 auto val = in[M * D * i + D * j + k];
287 out[M * D * i + D * j + k] = (fn(i, j, val) ? fill_val : val);
295 class SequenceFunctor {
297 explicit SequenceFunctor(
const int* sl,
const size_t len)
298 : sl_(sl), len_(len) {}
299 bool operator()(
int i,
int j,
float ) {
300 CAFFE_ENFORCE(i < len_,
"Out of bound.");
309 class WindowFunctor {
311 explicit WindowFunctor(
const int* c,
int r) : c(c), r(r) {}
312 bool operator()(
int i,
int j,
float ) {
313 return j > c[i] + r || j < c[i] - r;
323 bool operator()(
int i,
int j,
float ) {
330 bool operator()(
int i,
int j,
float ) {
335 class UpperDiagFunctor {
337 bool operator()(
int i,
int j,
float ) {
342 class LowerDiagFunctor {
344 bool operator()(
int i,
int j,
float ) {
352 bool SequenceMaskOp<CPUContext>::RunOnDevice() {
353 return DispatchHelper<TensorTypes<float>>::call(
this, Input(0));
358 bool SequenceMaskOp<CPUContext>::DoRunWithType() {
359 const Tensor* input = &Input(0);
360 const Tensor* sequence_lengths =
nullptr;
361 const Tensor* window_centers =
nullptr;
363 if (mode_ ==
"sequence") {
364 sequence_lengths = &Input(1);
365 }
else if (mode_ ==
"window") {
366 window_centers = &Input(1);
369 auto* output = Output(0, input->sizes(), at::dtype<T>());
371 const auto canonical_axis = input->canonical_axis_index(axis_);
374 int canonical_batch = -1;
375 if ((HasArgument(
"batch"))) {
376 canonical_batch = input->canonical_axis_index(batch_);
380 if (canonical_batch >= 0) {
381 CAFFE_ENFORCE_LT(canonical_batch, canonical_axis);
387 (canonical_batch >= 0
388 ? input->size_between_dim(canonical_batch, canonical_axis)
389 : input->size_to_dim(canonical_axis));
390 const int right = input->size_from_dim(canonical_axis);
393 const int batch_dim =
394 (canonical_batch >= 0
395 ? input->size_to_dim(canonical_batch) * input->size(canonical_batch)
398 T fill_val = convert::To<float, T>(grad_ ? 0.0f : fill_val_);
399 if (mode_ ==
"sequence") {
401 sequence_lengths,
"Sequence length not provided for mode 'sequence'!");
402 if (HasArgument(
"repeat_from_axis")) {
403 const int canonical_repeat_from =
404 input->canonical_axis_index(repeat_from_);
405 const int repeated_dims = input->size_from_dim(canonical_repeat_from);
406 const int masked_dims = right / repeated_dims;
407 RepeatedMaskWithFunctor(
413 sequence_lengths->data<
int>(), sequence_lengths->numel()),
415 output->template mutable_data<T>());
423 sequence_lengths->data<
int>(), sequence_lengths->numel()),
425 output->template mutable_data<T>());
427 }
else if (mode_ ==
"window") {
433 WindowFunctor(window_centers->data<
int>(), radius_),
435 output->template mutable_data<T>());
436 }
else if (mode_ ==
"upper") {
444 output->template mutable_data<T>());
445 }
else if (mode_ ==
"lower") {
453 output->template mutable_data<T>());
454 }
else if (mode_ ==
"upperdiag") {
462 output->template mutable_data<T>());
463 }
else if (mode_ ==
"lowerdiag") {
471 output->template mutable_data<T>());
473 CAFFE_ENFORCE(
false,
"Unsupported mode for SequenceMaskOp!");
480 REGISTER_CPU_OPERATOR(SequenceMask, SequenceMaskOp<CPUContext>);
482 OPERATOR_SCHEMA(SequenceMask)
486 Mask op designed for use in attention mechanisms for sequence modeling tasks. 487 Supports batching: given batch_dim, collapses dims 0 through batch_dim into a 488 single dimension, e.g. if tensor dims are [4,2,1,3,4] and batch_dim=2, first 489 collapse tensor to [4*2*1,3,4], then mask each batch [i,:,:]. 492 Two current operating modes: 495 1) Given a 2D input tensor and 1D tensor of sequence lengths, for each row i in 496 the input tensor, set elements in that row to -inf if their column index 497 j >= sequence_lengths[i]. This mode takes two inputs and argument mode = 501 2) Triangular mask. Given row index i and column index j, set elements to -inf 502 given the following conditions: 504 mode='upper', x_ij = -inf if j < i 505 mode='lower', x_ij = -inf if j > i 506 mode='upperdiag', x_ij = -inf if j <= i 507 mode='lowerdiag', x_ij = -inf if j >= i 509 This mode takes one input. 512 3) Window Mask. Given a 2D input tensor and 1D tensor of window centers, 513 for each row i in the input tensor, set elements in that row to -inf 514 if their column index j outside [center - radius, center + radius]. 515 This mode takes two inputs and argument mode = 'sequence'. 516 Argument 'radius' should be provided. 518 .Input(0, "input",
"Tensor to apply masking to")
519 .Input(1,
"sequence_lengths",
"1D Tensor of sequence lengths for mode #1")
520 .Output(0,
"masked_tensor",
"Input tensor with masking applied")
523 "(string) Mode selection. Possible values: " 524 "'sequence', 'upper', 'lower', 'upperdiag', 'lowerdiag'")
527 "(int) Beginning axis of row elements. All dimensions to the left " 528 "will be treated as row indices and those to the right (inclusive) " 529 "will be treated as column indices in the 2D mask")
530 .Arg(
"grad",
"(bool) operate in gradient mode")
531 .Arg(
"radius",
"(int) radius of windows in window mode")
532 .Arg(
"batch",
"(int) batch dimension of tensor (optional)")
535 "(int) used when mask should be repeated for " 536 "one or more data dimensions (beginning at this axis). " 537 "(currently only supported for sequence mode without batch argument)");
539 class GetSequenceMaskGradient :
public GradientMakerBase {
540 using GradientMakerBase::GradientMakerBase;
541 vector<OperatorDef> GetGradientDefs()
override {
542 vector<Argument> args;
543 args.reserve(Def().arg().size());
544 for (
const auto& x : Def().arg()) {
547 args.push_back(MakeArgument<bool>(
"grad",
true));
548 if (def_.input_size() == 1) {
549 return SingleGradientDef(
552 vector<string>{GO(0)},
553 vector<string>{GI(0)},
556 return SingleGradientDef(
559 vector<string>{GO(0), I(1)},
560 vector<string>{GI(0)},
565 bool CopyArguments()
const override {
570 REGISTER_GRADIENT(SequenceMask, GetSequenceMaskGradient);
A global dictionary that holds information about what Caffe2 modules have been loaded in the current ...