Caffe2 - C++ API
A deep learning, cross platform ML framework
sequence_ops.cc
1 #include "caffe2/operators/sequence_ops.h"
2 #include "caffe2/core/operator.h"
3 #include "caffe2/core/tensor.h"
4 
5 namespace caffe2 {
6 
7 template <>
8 template <typename T>
9 void GatherPaddingOp<CPUContext>::GatherPadding(
10  const int outer_size,
11  const int lengths_size,
12  const int block_size,
13  const int pad_width,
14  const T* in_ptr,
15  const int* lengths_ptr,
16  T* padding_start_ptr,
17  T* padding_end_ptr) {
18  CAFFE_ENFORCE(
19  (!std::is_same<bool, T>::value),
20  "GatherPadding should not be executed on an input of type bool, as "
21  "addition is not properly defined with booleans.");
22  int64_t total_length = 0;
23  for (int i = 0; i < lengths_size; ++i) {
24  // check total length consistency
25  const auto length = lengths_ptr[i];
26  total_length += length;
27  CAFFE_ENFORCE_LE(total_length, outer_size);
28  // accumulate start paddings
29  for (int j = 0; j < startPaddingWidth_; ++j) {
30  for (int k = 0; k < block_size; ++k) {
31  // Note: MSVC warns about unsafe use of type bool in operation.
32  // This is now guarded by a CAFFE_ENFORCE so we can suppress it.
33  #pragma warning(suppress: 4804)
34  padding_start_ptr[k] += in_ptr[k];
35  }
36  in_ptr += block_size;
37  }
38  in_ptr += block_size * (length - pad_width);
39  // accumulate end paddings
40  for (int j = 0; j < endPaddingWidth_; ++j) {
41  for (int k = 0; k < block_size; ++k) {
42  #pragma warning(suppress: 4804)
43  padding_end_ptr[k] += in_ptr[k];
44  }
45  in_ptr += block_size;
46  }
47  }
48 }
49 
50 template <>
51 template <typename T>
52 bool RemovePaddingOp<CPUContext>::DoRunWithType() {
53  const auto& in = Input(0);
54  CAFFE_ENFORCE_GE(in.dim(), 1);
55  const int32_t outer_size = in.sizes()[0];
56  const auto block_size = std::accumulate(
57  in.sizes().begin() + 1, in.sizes().end(), 1, std::multiplies<int64_t>());
58  const auto pad_width = startPaddingWidth_ + endPaddingWidth_;
59 
60  // if no lengths is provided, assume it is a single full-span entry
61  const int32_t* lengths_ptr = &outer_size;
62  int64_t lengths_size = 1;
63  if (InputSize() > 1) {
64  const auto& lengths = Input(1);
65  lengths_ptr = lengths.data<int32_t>();
66  lengths_size = lengths.numel();
67  }
68 
69  auto out_dims = in.sizes().vec();
70  out_dims[0] -= pad_width * lengths_size;
71  auto* out = Output(0, std::move(out_dims), at::dtype<T>());
72 
73  const auto* in_ptr = in.template data<T>();
74  auto* out_ptr = out->template mutable_data<T>();
75  int64_t total_length = 0;
76  for (int i = 0; i < lengths_size; ++i) {
77  // check that total length is consistent
78  const auto length = lengths_ptr[i];
79  total_length += length;
80  CAFFE_ENFORCE_LE(total_length, outer_size);
81  std::copy(
82  in_ptr + block_size * startPaddingWidth_,
83  in_ptr + block_size * (length - endPaddingWidth_),
84  out_ptr);
85  in_ptr += block_size * length;
86  out_ptr += block_size * (length - pad_width);
87  }
88  if (OutputSize() == 1) {
89  return true;
90  }
91 
92  auto* lengths_out = Output(1, {lengths_size}, at::dtype<int32_t>());
93  std::transform(
94  lengths_ptr,
95  lengths_ptr + lengths_size,
96  lengths_out->template mutable_data<int32_t>(),
97  [pad_width](int32_t x) { return x - pad_width; });
98  return true;
99 }
100 
101 template <>
102 template <typename T>
103 bool AddPaddingOp<CPUContext>::MakePadding(
104  const T* in_ptr,
105  T* out_ptr,
106  const int32_t* lengths_ptr,
107  int32_t lengths_size,
108  int32_t outer_size,
109  const T* padding_start_ptr,
110  const T* padding_end_ptr,
111  int64_t block_size) {
112  if (!lengths_ptr) {
113  lengths_ptr = &outer_size;
114  }
115 
116  int64_t total_length = 0;
117  for (int i = 0; i < lengths_size; ++i) {
118  // check that total length is consistent
119  const auto length = lengths_ptr[i];
120  total_length += length;
121  CAFFE_ENFORCE_LE(total_length, outer_size);
122  // copy padding before
123  if (!padding_start_ptr) {
124  memset(out_ptr, 0, block_size * startPaddingWidth_ * sizeof(T));
125  out_ptr += block_size * startPaddingWidth_;
126  } else {
127  for (int j = 0; j < startPaddingWidth_; ++j) {
128  std::copy(padding_start_ptr, padding_start_ptr + block_size, out_ptr);
129  out_ptr += block_size;
130  }
131  }
132  // copy payload
133  const auto num_elems = block_size * length;
134  std::copy(in_ptr, in_ptr + num_elems, out_ptr);
135  in_ptr += num_elems;
136  out_ptr += num_elems;
137  // copy padding after
138  if (!padding_end_ptr) {
139  memset(out_ptr, 0, block_size * endPaddingWidth_ * sizeof(T));
140  out_ptr += block_size * endPaddingWidth_;
141  } else {
142  for (int j = 0; j < endPaddingWidth_; ++j) {
143  std::copy(padding_end_ptr, padding_end_ptr + block_size, out_ptr);
144  out_ptr += block_size;
145  }
146  }
147  }
148  if (OutputSize() == 1) {
149  return true;
150  }
151 
152  auto* lengths_out = Output(1, {lengths_size}, at::dtype<int32_t>());
153  const auto pad_width = startPaddingWidth_ + endPaddingWidth_;
154  std::transform(
155  lengths_ptr,
156  lengths_ptr + lengths_size,
157  lengths_out->template mutable_data<int32_t>(),
158  [pad_width](int32_t x) { return x + pad_width; });
159  return true;
160 }
161 
162 template <>
163 bool PadEmptySamplesOp<CPUContext>::RunOnDevice() {
164  auto& lengths = Input(0);
165  auto* lengthsPtr = lengths.template data<int32_t>();
166  CAFFE_ENFORCE(lengths.dim() == 1, "LENGTH should be 1-D");
167  CAFFE_ENFORCE(InputSize() >= 1, "Input size must be no less than 1");
168 
169  int needPadding = 0;
170  int sumLen = 0;
171  for (int i = 0; i < lengths.numel(); ++i) {
172  if (lengthsPtr[i] == 0) {
173  needPadding++;
174  }
175  sumLen += lengthsPtr[i];
176  }
177 
178  auto* out_lengths = Output(0, {lengths.numel()}, at::dtype<int32_t>());
179  auto* outLengthsPtr = out_lengths->template mutable_data<int32_t>();
180  for (int i = 0; i < lengths.numel(); ++i) {
181  if (lengthsPtr[i] == 0) {
182  outLengthsPtr[i] = 1;
183  } else {
184  outLengthsPtr[i] = lengthsPtr[i];
185  }
186  }
187 
188  for (int k = 0; k < InputSize() - 1; k++) {
189  auto& features = Input(1 + k);
190  CAFFE_ENFORCE(features.dim() >= 1, "FEATURE should at least 1-D");
191  CAFFE_ENFORCE(
192  features.size(0) == sumLen, "FEATURE and LENGTH should be consistent");
193  const auto block_size = features.size_from_dim(1);
194 
195  auto* out_features = Output(1 + k);
196  auto outDim = features.sizes().vec();
197  outDim.at(0) += needPadding;
198  out_features->Resize(outDim);
199  auto dst =
200  static_cast<char*>(out_features->raw_mutable_data(features.dtype()));
201  auto src_base = static_cast<const char*>(features.raw_data());
202  // copy data and add padding index as zero
203  Tensor zero{CPU};
204  zero.Resize(block_size);
205  auto zeroPtr = static_cast<char*>(zero.raw_mutable_data(features.dtype()));
206  memset(zeroPtr, 0, zero.nbytes());
207  int start_dest = 0;
208  int start_src = 0;
209  for (int i = 0; i < lengths.numel(); ++i) {
210  if (lengthsPtr[i] == 0) {
211  context_.CopyItemsSameDevice(
212  features.dtype(),
213  block_size,
214  zeroPtr,
215  dst + start_dest * features.dtype().itemsize());
216  start_dest += block_size;
217  } else {
218  auto src = src_base + start_src * features.dtype().itemsize();
219  context_.CopyItemsSameDevice(
220  features.dtype(),
221  lengthsPtr[i] * block_size,
222  src,
223  dst + start_dest * features.dtype().itemsize());
224  start_src += lengthsPtr[i] * block_size;
225  start_dest += lengthsPtr[i] * block_size;
226  }
227  }
228  }
229  return true;
230 }
231 
232 REGISTER_CPU_OPERATOR(AddPadding, AddPaddingOp<CPUContext>);
233 REGISTER_CPU_OPERATOR(RemovePadding, RemovePaddingOp<CPUContext>);
234 REGISTER_CPU_OPERATOR(GatherPadding, GatherPaddingOp<CPUContext>);
235 REGISTER_CPU_OPERATOR(PadEmptySamples, PadEmptySamplesOp<CPUContext>);
236 
238  using GradientMakerBase::GradientMakerBase;
239  vector<OperatorDef> GetGradientDefs() override {
240  // whether to provide lengths as input to gradient
241  vector<std::string> g_inputs{GO(0)};
242  if (Def().input_size() > 1) {
243  CAFFE_ENFORCE(Def().output_size() > 1);
244  g_inputs.push_back(O(1));
245  }
246 
247  vector<OperatorDef> ops;
248  // gradient on the data
249  ops.push_back(CreateOperatorDef(
250  "RemovePadding", "", g_inputs, vector<string>{GI(0)}));
251  // gradient on the start_padding (and end_padding)
252  if (Def().input_size() >= 3) {
253  std::vector<string> padding_grads{GI(2)};
254  if (Def().input_size() == 4) {
255  padding_grads.push_back(GI(3));
256  }
257  auto g_inputs2 = g_inputs;
258  ops.push_back(
259  CreateOperatorDef("GatherPadding", "", g_inputs2, padding_grads));
260  }
261  return ops;
262  }
263 };
264 REGISTER_GRADIENT(AddPadding, GetAddPaddingGradient);
265 
267  using GradientMakerBase::GradientMakerBase;
268  vector<OperatorDef> GetGradientDefs() override {
269  // whether to provide lengths as input to gradient
270  vector<std::string> g_inputs{GO(0)};
271  if (Def().input_size() > 1) {
272  CAFFE_ENFORCE(Def().output_size() > 1);
273  g_inputs.push_back(O(1));
274  }
275 
276  return SingleGradientDef("AddPadding", "", g_inputs, vector<string>{GI(0)});
277  }
278 };
279 REGISTER_GRADIENT(RemovePadding, GetRemovePaddingGradient);
280 
281 OPERATOR_SCHEMA(AddPadding)
282  .NumInputs(1, 4)
283  .NumOutputs(1, 2)
284  .SetDoc(R"DOC(
285 Given a partitioned tensor $T<N, D_1, ..., D_n>$, where the partitions are
286 defined as ranges on its outer-most (slowest varying) dimension $N$,
287 return a tensor $T<(N + 2 * padding\_width), D_1, ..., D_n>$ with paddings
288 added to the start and end of each range.
289 
290 Optionally, different paddings can be provided for beginning and end.
291 Paddings provided must be a tensor $T<D_1, ..., D_n>$. If no padding is
292 provided, add zero padding. If no lengths vector is provided, add padding
293 only once, at the start and end of data.
294 
295 Github Links:
296 
297 - https://github.com/pytorch/pytorch/blob/master/caffe2/operators/sequence_ops.cc
298 
299 <details>
300 
301 <summary> <b>Example</b> </summary>
302 
303 **Code**
304 
305 ```
306 workspace.ResetWorkspace()
307 
308 op = core.CreateOperator(
309  "AddPadding",
310  ["X", "lengths"],
311  ["Y", "lengths_out"],
312  padding_width=1
313 
314 )
315 
316 workspace.FeedBlob("X", (np.random.rand(3,2,2).astype(np.float32)))
317 workspace.FeedBlob("lengths", np.array([3]).astype(np.int32))
318 
319 print("X:", workspace.FetchBlob("X"))
320 workspace.RunOperatorOnce(op)
321 print("Y:", workspace.FetchBlob("Y"))
322 print("lengths_out:", workspace.FetchBlob("lengths_out"))
323 ```
324 
325 **Result**
326 
327 ```
328 X: [[[0.2531572 0.4588472 ]
329  [0.45140603 0.61161053]]
330 
331  [[0.92500854 0.8045306 ]
332  [0.03356671 0.30233648]]
333 
334  [[0.4660227 0.6287745 ]
335  [0.79372746 0.08609265]]]
336 Y: [[[0. 0. ]
337  [0. 0. ]]
338 
339  [[0.2531572 0.4588472 ]
340  [0.45140603 0.61161053]]
341 
342  [[0.92500854 0.8045306 ]
343  [0.03356671 0.30233648]]
344 
345  [[0.4660227 0.6287745 ]
346  [0.79372746 0.08609265]]
347 
348  [[0. 0. ]
349  [0. 0. ]]]
350 lengths_out: [5]
351 ```
352 
353 </details>
354 
355 )DOC")
356  .Arg(
357  "padding_width",
358  "*(type: int)* Number of copies of padding to add around each range.")
359  .Arg(
360  "end_padding_width",
361  "*(type: int)* [OPTIONAL] Specifies a different end-padding width. If "
362  "this is not set, will use same as `padding_width`.")
363  .Input(
364  0,
365  "data_in",
366  "*(type: Tensor)* Input data ($T<N, D_1, ..., D_n>$).")
367  .Input(
368  1,
369  "lengths",
370  "*(type: Tensor`<int>`)* Number of elements in each range. "
371  "sum(lengths) = N.")
372  .Input(
373  2,
374  "start_padding",
375  "*(type: Tensor`<int>`)* [OPTIONAL] Padding data for range start "
376  "($T<D_1, ..., D_n>$).")
377  .Input(
378  3,
379  "end_padding",
380  "*(type: Tensor`<int>`)* [OPTIONAL] Padding for range end. If not "
381  "provided, `start_padding` is used ($T<D_1, ..., D_n>$).")
382  .Output(
383  0,
384  "data_out",
385  "*(type: Tensor)* Padded data tensor ($T<N + 2*padding_width, "
386  "D_1, ..., D_n>$).")
387  .Output(
388  1,
389  "lengths_out",
390  "*(type: Tensor`<int>`)* [OPTIONAL] Lengths for each padded range.");
391 
392 OPERATOR_SCHEMA(RemovePadding)
393  .NumInputs(1, 2)
394  .NumOutputs(1, 2)
395  .SetDoc(R"DOC(
396 Remove padding around the edges of each segment of the input data. This is the
397 reverse operation of **AddPadding**, and uses the same arguments and conventions
398 for input and output data format.
399 
400 Github Links:
401 
402 - https://github.com/pytorch/pytorch/blob/master/caffe2/operators/sequence_ops.cc
403 
404 <details>
405 
406 <summary> <b>Example</b> </summary>
407 
408 **Code**
409 
410 ```
411 workspace.ResetWorkspace()
412 
413 addpad_op = core.CreateOperator(
414  "AddPadding",
415  ["X", "lengths_add"],
416  ["Y", "lengths_out_add"],
417  padding_width=1
418 )
419 
420 rmpad_op = core.CreateOperator(
421  "RemovePadding",
422  ["Y", "lengths_rm"],
423  ["Z", "lengths_out_rm"],
424  padding_width=1
425 )
426 
427 workspace.FeedBlob("X", (np.random.randint(20, size=(3,5))))
428 workspace.FeedBlob("lengths_add", np.array([3]).astype(np.int32))
429 workspace.FeedBlob("lengths_rm", np.array([5]).astype(np.int32))
430 
431 print("X:", workspace.FetchBlob("X"))
432 workspace.RunOperatorOnce(addpad_op)
433 print("Y:", workspace.FetchBlob("Y"))
434 print("lengths_out_add:", workspace.FetchBlob("lengths_out_add"))
435 
436 workspace.RunOperatorOnce(rmpad_op)
437 print("Z:", workspace.FetchBlob("Z"))
438 print("lengths_out_rm:", workspace.FetchBlob("lengths_out_rm"))
439 ```
440 
441 **Result**
442 
443 ```
444 X: [[17 19 1 9 1]
445  [19 3 5 19 1]
446  [16 0 0 0 4]]
447 Y: [[ 0 0 0 0 0]
448  [17 19 1 9 1]
449  [19 3 5 19 1]
450  [16 0 0 0 4]
451  [ 0 0 0 0 0]]
452 lengths_out_add: [5]
453 Z: [[17 19 1 9 1]
454  [19 3 5 19 1]
455  [16 0 0 0 4]]
456 lengths_out_rm: [3]
457 ```
458 
459 </details>
460 
461 )DOC")
462  .Arg(
463  "padding_width",
464  "*(type: int)* Outer-size of padding to remove around each range.")
465  .Arg(
466  "end_padding_width",
467  "*(type: int)* [OPTIONAL] Specifies a different end-padding width. "
468  "If this is not set, will use same as `padding_width`.")
469  .Input(
470  0,
471  "data_in",
472  "Input tensor ($T<N, D_1, ..., D_n>$).")
473  .Input(
474  1,
475  "lengths",
476  "*(type: Tensor`<int>`)* Number of elements in each range. "
477  "sum(lengths) = N. If not provided, considers all data as a single "
478  "segment.")
479  .Output(
480  0,
481  "data_out",
482  "*(type: Tensor)* Padded data tensor "
483  "($T<N + 2*padding_width, D_1, ..., D_n>$).")
484  .Output(
485  1,
486  "lengths_out",
487  "*(type: Tensor`<int>`)* [OPTIONAL] Lengths for each padded range.");
488 
489 OPERATOR_SCHEMA(GatherPadding)
490  .NumInputs(2)
491  .NumOutputs(1, 2)
492  .SetDoc(R"DOC(
493 Gather the sum of start and end paddings in a padded input sequence. Used in
494 order to compute the gradients of AddPadding w.r.t the padding tensors.
495 )DOC")
496  .Arg("padding_width", "Outer-size of padding present around each range.")
497  .Arg(
498  "end_padding_width",
499  "(Optional) Specifies a different end-padding width.")
500  .Input(0, "data_in", "T<N, D1..., Dn> Padded input data")
501  .Input(
502  1,
503  "lengths",
504  "(i64) Num of elements in each range. sum(lengths) = N. "
505  "If not provided, considers all data as a single segment.")
506  .Output(
507  0,
508  "padding_sum",
509  "Sum of all start paddings, or of all "
510  "paddings if end_padding_sum is not provided.")
511  .Output(
512  1,
513  "end_padding_sum",
514  "T<D1..., Dn> Sum of all end paddings, if provided.");
515 
516 OPERATOR_SCHEMA(PadEmptySamples)
517  .NumInputs(1, INT_MAX)
518  .NumOutputs(1, INT_MAX)
519  .SetDoc(R"DOC(
520 Pad empty field given lengths and index features,
521 
522 Input(0) is a blob pointing to the lengths of samples in one batch,
523 [Input(1),... Input(num_fields)] a list of tensors containing the data for
524 each field of the features.
525 
526 PadEmptySamples is thread safe.
527 )DOC")
528  .Input(0, "lengths", "A blob containing a pointer to the lengths.")
529  .Output(
530  0,
531  "out_lengths",
532  "Tensor containing lengths with empty sample padded.");
533 
534 } // namespace caffe2
A global dictionary that holds information about what Caffe2 modules have been loaded in the current ...
Definition: blob.h:13
static vector< OperatorDef > SingleGradientDef(const Args &...args)
a helper function to allow one to create one single operator def, which is usually the case for many ...