Caffe2 - C++ API
A deep learning, cross platform ML framework
sequence_ops.cc
1 
17 #include "caffe2/operators/sequence_ops.h"
18 #include "caffe2/core/operator.h"
19 #include "caffe2/core/tensor.h"
20 
21 namespace caffe2 {
22 
23 template <>
24 template <typename T>
25 void GatherPaddingOp<CPUContext>::GatherPadding(
26  const int outer_size,
27  const int lengths_size,
28  const int block_size,
29  const int pad_width,
30  const T* in_ptr,
31  const int* lengths_ptr,
32  T* padding_start_ptr,
33  T* padding_end_ptr) {
34  CAFFE_ENFORCE(
35  (!std::is_same<bool, T>::value),
36  "GatherPadding should not be executed on an input of type bool, as "
37  "addition is not properly defined with booleans.");
38  int64_t total_length = 0;
39  for (int i = 0; i < lengths_size; ++i) {
40  // check total length consistency
41  const auto length = lengths_ptr[i];
42  total_length += length;
43  CAFFE_ENFORCE_LE(total_length, outer_size);
44  // accumulate start paddings
45  for (int j = 0; j < startPaddingWidth_; ++j) {
46  for (int k = 0; k < block_size; ++k) {
47  // Note: MSVC warns about unsafe use of type bool in operation.
48  // This is now guarded by a CAFFE_ENFORCE so we can suppress it.
49  #pragma warning(suppress: 4804)
50  padding_start_ptr[k] += in_ptr[k];
51  }
52  in_ptr += block_size;
53  }
54  in_ptr += block_size * (length - pad_width);
55  // accumulate end paddings
56  for (int j = 0; j < endPaddingWidth_; ++j) {
57  for (int k = 0; k < block_size; ++k) {
58  #pragma warning(suppress: 4804)
59  padding_end_ptr[k] += in_ptr[k];
60  }
61  in_ptr += block_size;
62  }
63  }
64 }
65 
66 template <>
67 template <typename T>
68 bool RemovePaddingOp<CPUContext>::DoRunWithType() {
69  const auto& in = Input(0);
70  CAFFE_ENFORCE_GE(in.ndim(), 1);
71  const int32_t outer_size = in.dims()[0];
72  const auto block_size = std::accumulate(
73  in.dims().begin() + 1, in.dims().end(), 1, std::multiplies<TIndex>());
74  const auto pad_width = startPaddingWidth_ + endPaddingWidth_;
75 
76  // if no lengths is provided, assume it is a single full-span entry
77  const int32_t* lengths_ptr = &outer_size;
78  int64_t lengths_size = 1;
79  if (InputSize() > 1) {
80  const auto& lengths = Input(1);
81  lengths_ptr = lengths.data<int32_t>();
82  lengths_size = lengths.size();
83  }
84 
85  auto* out = Output(0);
86  {
87  auto out_dims = in.dims();
88  out_dims[0] -= pad_width * lengths_size;
89  out->Resize(std::move(out_dims));
90  }
91  const auto* in_ptr = in.template data<T>();
92  auto* out_ptr = out->template mutable_data<T>();
93  int64_t total_length = 0;
94  for (int i = 0; i < lengths_size; ++i) {
95  // check that total length is consistent
96  const auto length = lengths_ptr[i];
97  total_length += length;
98  CAFFE_ENFORCE_LE(total_length, outer_size);
99  std::copy(
100  in_ptr + block_size * startPaddingWidth_,
101  in_ptr + block_size * (length - endPaddingWidth_),
102  out_ptr);
103  in_ptr += block_size * length;
104  out_ptr += block_size * (length - pad_width);
105  }
106  if (OutputSize() == 1) {
107  return true;
108  }
109  auto* lengths_out = Output(1);
110  lengths_out->Resize(lengths_size);
111  std::transform(
112  lengths_ptr,
113  lengths_ptr + lengths_size,
114  lengths_out->mutable_data<int32_t>(),
115  [pad_width](int32_t x) { return x - pad_width; });
116  return true;
117 }
118 
119 template <>
120 template <typename T>
121 bool AddPaddingOp<CPUContext>::MakePadding(
122  const T* in_ptr,
123  T* out_ptr,
124  const int32_t* lengths_ptr,
125  int32_t lengths_size,
126  int32_t outer_size,
127  const T* padding_start_ptr,
128  const T* padding_end_ptr,
129  int64_t block_size) {
130  if (!lengths_ptr) {
131  lengths_ptr = &outer_size;
132  }
133 
134  int64_t total_length = 0;
135  for (int i = 0; i < lengths_size; ++i) {
136  // check that total length is consistent
137  const auto length = lengths_ptr[i];
138  total_length += length;
139  CAFFE_ENFORCE_LE(total_length, outer_size);
140  // copy padding before
141  if (!padding_start_ptr) {
142  memset(out_ptr, 0, block_size * startPaddingWidth_ * sizeof(T));
143  out_ptr += block_size * startPaddingWidth_;
144  } else {
145  for (int j = 0; j < startPaddingWidth_; ++j) {
146  std::copy(padding_start_ptr, padding_start_ptr + block_size, out_ptr);
147  out_ptr += block_size;
148  }
149  }
150  // copy payload
151  const auto num_elems = block_size * length;
152  std::copy(in_ptr, in_ptr + num_elems, out_ptr);
153  in_ptr += num_elems;
154  out_ptr += num_elems;
155  // copy padding after
156  if (!padding_end_ptr) {
157  memset(out_ptr, 0, block_size * endPaddingWidth_ * sizeof(T));
158  out_ptr += block_size * endPaddingWidth_;
159  } else {
160  for (int j = 0; j < endPaddingWidth_; ++j) {
161  std::copy(padding_end_ptr, padding_end_ptr + block_size, out_ptr);
162  out_ptr += block_size;
163  }
164  }
165  }
166  if (OutputSize() == 1) {
167  return true;
168  }
169  auto* lengths_out = Output(1);
170  lengths_out->Resize(lengths_size);
171  const auto pad_width = startPaddingWidth_ + endPaddingWidth_;
172  std::transform(
173  lengths_ptr,
174  lengths_ptr + lengths_size,
175  lengths_out->mutable_data<int32_t>(),
176  [pad_width](int32_t x) { return x + pad_width; });
177  return true;
178 }
179 
180 template <>
181 bool PadEmptySamplesOp<CPUContext>::RunOnDevice() {
182  auto& lengths = Input(0);
183  auto* lengthsPtr = lengths.template data<int32_t>();
184  CAFFE_ENFORCE(lengths.ndim() == 1, "LENGTH should be 1-D");
185  CAFFE_ENFORCE(InputSize() >= 1, "Input size must be no less than 1");
186 
187  auto* out_lengths = Output(0);
188  int needPadding = 0;
189  int sumLen = 0;
190  for (int i = 0; i < lengths.size(); ++i) {
191  if (lengthsPtr[i] == 0) {
192  needPadding++;
193  }
194  sumLen += lengthsPtr[i];
195  }
196 
197  out_lengths->Resize(lengths.size());
198  auto* outLengthsPtr = out_lengths->template mutable_data<int32_t>();
199  for (int i = 0; i < lengths.size(); ++i) {
200  if (lengthsPtr[i] == 0) {
201  outLengthsPtr[i] = 1;
202  } else {
203  outLengthsPtr[i] = lengthsPtr[i];
204  }
205  }
206 
207  for (int k = 0; k < InputSize() - 1; k++) {
208  auto& features = Input(1 + k);
209  CAFFE_ENFORCE(features.ndim() >= 1, "FEATURE should at least 1-D");
210  CAFFE_ENFORCE(
211  features.dim(0) == sumLen, "FEATURE and LENGTH should be consistent");
212  const auto block_size = features.size_from_dim(1);
213 
214  auto* out_features = Output(1 + k);
215  auto outDim = features.dims();
216  outDim.at(0) += needPadding;
217  out_features->Resize(outDim);
218  auto dst =
219  static_cast<char*>(out_features->raw_mutable_data(features.meta()));
220  auto src_base = static_cast<const char*>(features.raw_data());
221  // copy data and add padding index as zero
222  Tensor<CPUContext> zero;
223  zero.Resize(block_size);
224  auto zeroPtr =
225  static_cast<const char*>(zero.raw_mutable_data(features.meta()));
226  int start_dest = 0;
227  int start_src = 0;
228  for (int i = 0; i < lengths.size(); ++i) {
229  if (lengthsPtr[i] == 0) {
230  context_.template CopyItems<CPUContext, CPUContext>(
231  features.meta(),
232  block_size,
233  zeroPtr,
234  dst + start_dest * features.meta().itemsize());
235  start_dest += block_size;
236  } else {
237  auto src = src_base + start_src * features.meta().itemsize();
238  context_.template CopyItems<CPUContext, CPUContext>(
239  features.meta(),
240  lengthsPtr[i] * block_size,
241  src,
242  dst + start_dest * features.meta().itemsize());
243  start_src += lengthsPtr[i] * block_size;
244  start_dest += lengthsPtr[i] * block_size;
245  }
246  }
247  }
248  return true;
249 }
250 
251 REGISTER_CPU_OPERATOR(AddPadding, AddPaddingOp<CPUContext>);
252 REGISTER_CPU_OPERATOR(RemovePadding, RemovePaddingOp<CPUContext>);
253 REGISTER_CPU_OPERATOR(GatherPadding, GatherPaddingOp<CPUContext>);
254 REGISTER_CPU_OPERATOR(PadEmptySamples, PadEmptySamplesOp<CPUContext>);
255 
257  using GradientMakerBase::GradientMakerBase;
258  vector<OperatorDef> GetGradientDefs() override {
259  // whether to provide lengths as input to gradient
260  vector<std::string> g_inputs{GO(0)};
261  if (Def().input_size() > 1) {
262  CAFFE_ENFORCE(Def().output_size() > 1);
263  g_inputs.push_back(O(1));
264  }
265 
266  vector<OperatorDef> ops;
267  // gradient on the data
268  ops.push_back(CreateOperatorDef(
269  "RemovePadding", "", g_inputs, vector<string>{GI(0)}));
270  // gradient on the start_padding (and end_padding)
271  if (Def().input_size() >= 3) {
272  std::vector<string> padding_grads{GI(2)};
273  if (Def().input_size() == 4) {
274  padding_grads.push_back(GI(3));
275  }
276  auto g_inputs2 = g_inputs;
277  ops.push_back(
278  CreateOperatorDef("GatherPadding", "", g_inputs2, padding_grads));
279  }
280  return ops;
281  }
282 };
283 REGISTER_GRADIENT(AddPadding, GetAddPaddingGradient);
284 
286  using GradientMakerBase::GradientMakerBase;
287  vector<OperatorDef> GetGradientDefs() override {
288  // whether to provide lengths as input to gradient
289  vector<std::string> g_inputs{GO(0)};
290  if (Def().input_size() > 1) {
291  CAFFE_ENFORCE(Def().output_size() > 1);
292  g_inputs.push_back(O(1));
293  }
294 
295  return SingleGradientDef("AddPadding", "", g_inputs, vector<string>{GI(0)});
296  }
297 };
298 REGISTER_GRADIENT(RemovePadding, GetRemovePaddingGradient);
299 
300 OPERATOR_SCHEMA(AddPadding)
301  .NumInputs(1, 4)
302  .NumOutputs(1, 2)
303  .SetDoc(R"DOC(
304 Given a partitioned tensor T<N, D1..., Dn>, where the partitions are
305 defined as ranges on its outer-most (slowest varying) dimension N,
306 with given range lengths, return a tensor T<N + 2*padding_width, D1 ..., Dn>
307 with paddings added to the start and end of each range.
308 Optionally, different paddings can be provided for beginning and end. Paddings
309 provided must be a tensor T<D1..., Dn>.
310 
311 If no padding is provided, add zero padding.
312 If no lengths vector is provided, add padding only once,
313 at the start and end of data.
314 )DOC")
315  .Arg(
316  "padding_width",
317  "Number of copies of padding to add around each range.")
318  .Arg(
319  "end_padding_width",
320  "(Optional) Specifies a different end-padding width.")
321  .Input(0, "data_in", "(T<N, D1..., Dn>) Input data")
322  .Input(
323  1,
324  "lengths",
325  "(i64) Num of elements in each range. sum(lengths) = N.")
326  .Input(2, "start_padding", "T<D1..., Dn> Padding data for range start.")
327  .Input(
328  3,
329  "end_padding",
330  "T<D1..., Dn> (optional) Padding for range end. "
331  "If not provided, start_padding is used as end_padding as well.")
332  .Output(0, "data_out", "(T<N + 2*padding_width, D1..., Dn>) Padded data.")
333  .Output(1, "lengths_out", "(i64, optional) Lengths for each padded range.");
334 
335 OPERATOR_SCHEMA(RemovePadding)
336  .NumInputs(1, 2)
337  .NumOutputs(1, 2)
338  .SetDoc(R"DOC(
339 Remove padding around the edges of each segment of the input data. This is
340 the reverse opration of AddPadding, and uses the same arguments and conventions
341 for input and output data format.
342 )DOC")
343  .Arg("padding_width", "Outer-size of padding to remove around each range.")
344  .Arg(
345  "end_padding_width",
346  "(Optional) Specifies a different end-padding width.")
347  .Input(0, "data_in", "T<N, D1..., Dn> Input data")
348  .Input(
349  1,
350  "lengths",
351  "(i64) Num of elements in each range. sum(lengths) = N. "
352  "If not provided, considers all data as a single segment.")
353  .Output(0, "data_out", "(T<N - 2*padding_width, D1..., Dn>) Unpadded data.")
354  .Output(
355  1,
356  "lengths_out",
357  "(i64, optional) Lengths for each unpadded range.");
358 
359 OPERATOR_SCHEMA(GatherPadding)
360  .NumInputs(2)
361  .NumOutputs(1, 2)
362  .SetDoc(R"DOC(
363 Gather the sum of start and end paddings in a padded input sequence. Used in
364 order to compute the gradients of AddPadding w.r.t the padding tensors.
365 )DOC")
366  .Arg("padding_width", "Outer-size of padding present around each range.")
367  .Arg(
368  "end_padding_width",
369  "(Optional) Specifies a different end-padding width.")
370  .Input(0, "data_in", "T<N, D1..., Dn> Padded input data")
371  .Input(
372  1,
373  "lengths",
374  "(i64) Num of elements in each range. sum(lengths) = N. "
375  "If not provided, considers all data as a single segment.")
376  .Output(
377  0,
378  "padding_sum",
379  "Sum of all start paddings, or of all "
380  "paddings if end_padding_sum is not provided.")
381  .Output(
382  1,
383  "end_padding_sum",
384  "T<D1..., Dn> Sum of all end paddings, if provided.");
385 
386 OPERATOR_SCHEMA(PadEmptySamples)
387  .NumInputs(1, INT_MAX)
388  .NumOutputs(1, INT_MAX)
389  .SetDoc(R"DOC(
390 Pad empty field given lengths and index features,
391 
392 Input(0) is a blob pointing to the lengths of samples in one batch,
393 [Input(1),... Input(num_fields)] a list of tensors containing the data for
394 each field of the features.
395 
396 PadEmptySamples is thread safe.
397 )DOC")
398  .Input(0, "lengths", "A blob containing a pointer to the lengths.")
399  .Output(
400  0,
401  "out_lengths",
402  "Tensor containing lengths with empty sample padded.");
403 
404 } // namespace caffe2
Copyright (c) 2016-present, Facebook, Inc.
static vector< OperatorDef > SingleGradientDef(const Args &...args)
a helper function to allow one to create one single operator def, which is usually the case for many ...
Copyright (c) 2016-present, Facebook, Inc.