1 #include "caffe2/operators/sequence_ops.h" 2 #include "caffe2/core/operator.h" 3 #include "caffe2/core/tensor.h" 9 void GatherPaddingOp<CPUContext>::GatherPadding(
11 const int lengths_size,
15 const int* lengths_ptr,
19 (!std::is_same<bool, T>::value),
20 "GatherPadding should not be executed on an input of type bool, as " 21 "addition is not properly defined with booleans.");
22 int64_t total_length = 0;
23 for (
int i = 0; i < lengths_size; ++i) {
25 const auto length = lengths_ptr[i];
26 total_length += length;
27 CAFFE_ENFORCE_LE(total_length, outer_size);
29 for (
int j = 0; j < startPaddingWidth_; ++j) {
30 for (
int k = 0; k < block_size; ++k) {
33 #pragma warning(suppress: 4804) 34 padding_start_ptr[k] += in_ptr[k];
38 in_ptr += block_size * (length - pad_width);
40 for (
int j = 0; j < endPaddingWidth_; ++j) {
41 for (
int k = 0; k < block_size; ++k) {
42 #pragma warning(suppress: 4804) 43 padding_end_ptr[k] += in_ptr[k];
52 bool RemovePaddingOp<CPUContext>::DoRunWithType() {
53 const auto& in = Input(0);
54 CAFFE_ENFORCE_GE(in.dim(), 1);
55 const int32_t outer_size = in.sizes()[0];
56 const auto block_size = std::accumulate(
57 in.sizes().begin() + 1, in.sizes().end(), 1, std::multiplies<int64_t>());
58 const auto pad_width = startPaddingWidth_ + endPaddingWidth_;
61 const int32_t* lengths_ptr = &outer_size;
62 int64_t lengths_size = 1;
63 if (InputSize() > 1) {
64 const auto& lengths = Input(1);
65 lengths_ptr = lengths.data<int32_t>();
66 lengths_size = lengths.numel();
69 auto out_dims = in.sizes().vec();
70 out_dims[0] -= pad_width * lengths_size;
71 auto* out = Output(0, std::move(out_dims), at::dtype<T>());
73 const auto* in_ptr = in.template data<T>();
74 auto* out_ptr = out->template mutable_data<T>();
75 int64_t total_length = 0;
76 for (
int i = 0; i < lengths_size; ++i) {
78 const auto length = lengths_ptr[i];
79 total_length += length;
80 CAFFE_ENFORCE_LE(total_length, outer_size);
82 in_ptr + block_size * startPaddingWidth_,
83 in_ptr + block_size * (length - endPaddingWidth_),
85 in_ptr += block_size * length;
86 out_ptr += block_size * (length - pad_width);
88 if (OutputSize() == 1) {
92 auto* lengths_out = Output(1, {lengths_size}, at::dtype<int32_t>());
95 lengths_ptr + lengths_size,
96 lengths_out->template mutable_data<int32_t>(),
97 [pad_width](int32_t x) { return x - pad_width; });
102 template <
typename T>
103 bool AddPaddingOp<CPUContext>::MakePadding(
106 const int32_t* lengths_ptr,
107 int32_t lengths_size,
109 const T* padding_start_ptr,
110 const T* padding_end_ptr,
111 int64_t block_size) {
113 lengths_ptr = &outer_size;
116 int64_t total_length = 0;
117 for (
int i = 0; i < lengths_size; ++i) {
119 const auto length = lengths_ptr[i];
120 total_length += length;
121 CAFFE_ENFORCE_LE(total_length, outer_size);
123 if (!padding_start_ptr) {
124 memset(out_ptr, 0, block_size * startPaddingWidth_ *
sizeof(
T));
125 out_ptr += block_size * startPaddingWidth_;
127 for (
int j = 0; j < startPaddingWidth_; ++j) {
128 std::copy(padding_start_ptr, padding_start_ptr + block_size, out_ptr);
129 out_ptr += block_size;
133 const auto num_elems = block_size * length;
134 std::copy(in_ptr, in_ptr + num_elems, out_ptr);
136 out_ptr += num_elems;
138 if (!padding_end_ptr) {
139 memset(out_ptr, 0, block_size * endPaddingWidth_ *
sizeof(
T));
140 out_ptr += block_size * endPaddingWidth_;
142 for (
int j = 0; j < endPaddingWidth_; ++j) {
143 std::copy(padding_end_ptr, padding_end_ptr + block_size, out_ptr);
144 out_ptr += block_size;
148 if (OutputSize() == 1) {
152 auto* lengths_out = Output(1, {lengths_size}, at::dtype<int32_t>());
153 const auto pad_width = startPaddingWidth_ + endPaddingWidth_;
156 lengths_ptr + lengths_size,
157 lengths_out->template mutable_data<int32_t>(),
158 [pad_width](int32_t x) { return x + pad_width; });
163 bool PadEmptySamplesOp<CPUContext>::RunOnDevice() {
164 auto& lengths = Input(0);
165 auto* lengthsPtr = lengths.template data<int32_t>();
166 CAFFE_ENFORCE(lengths.dim() == 1,
"LENGTH should be 1-D");
167 CAFFE_ENFORCE(InputSize() >= 1,
"Input size must be no less than 1");
171 for (
int i = 0; i < lengths.numel(); ++i) {
172 if (lengthsPtr[i] == 0) {
175 sumLen += lengthsPtr[i];
178 auto* out_lengths = Output(0, {lengths.numel()}, at::dtype<int32_t>());
179 auto* outLengthsPtr = out_lengths->template mutable_data<int32_t>();
180 for (
int i = 0; i < lengths.numel(); ++i) {
181 if (lengthsPtr[i] == 0) {
182 outLengthsPtr[i] = 1;
184 outLengthsPtr[i] = lengthsPtr[i];
188 for (
int k = 0; k < InputSize() - 1; k++) {
189 auto& features = Input(1 + k);
190 CAFFE_ENFORCE(features.dim() >= 1,
"FEATURE should at least 1-D");
192 features.size(0) == sumLen,
"FEATURE and LENGTH should be consistent");
193 const auto block_size = features.size_from_dim(1);
195 auto* out_features = Output(1 + k);
196 auto outDim = features.sizes().vec();
197 outDim.at(0) += needPadding;
198 out_features->Resize(outDim);
200 static_cast<char*
>(out_features->raw_mutable_data(features.dtype()));
201 auto src_base =
static_cast<const char*
>(features.raw_data());
204 zero.Resize(block_size);
205 auto zeroPtr =
static_cast<char*
>(zero.raw_mutable_data(features.dtype()));
206 memset(zeroPtr, 0, zero.nbytes());
209 for (
int i = 0; i < lengths.numel(); ++i) {
210 if (lengthsPtr[i] == 0) {
211 context_.CopyItemsSameDevice(
215 dst + start_dest * features.dtype().itemsize());
216 start_dest += block_size;
218 auto src = src_base + start_src * features.dtype().itemsize();
219 context_.CopyItemsSameDevice(
221 lengthsPtr[i] * block_size,
223 dst + start_dest * features.dtype().itemsize());
224 start_src += lengthsPtr[i] * block_size;
225 start_dest += lengthsPtr[i] * block_size;
232 REGISTER_CPU_OPERATOR(AddPadding, AddPaddingOp<CPUContext>);
233 REGISTER_CPU_OPERATOR(RemovePadding, RemovePaddingOp<CPUContext>);
234 REGISTER_CPU_OPERATOR(GatherPadding, GatherPaddingOp<CPUContext>);
235 REGISTER_CPU_OPERATOR(PadEmptySamples, PadEmptySamplesOp<CPUContext>);
238 using GradientMakerBase::GradientMakerBase;
239 vector<OperatorDef> GetGradientDefs()
override {
241 vector<std::string> g_inputs{GO(0)};
242 if (Def().input_size() > 1) {
243 CAFFE_ENFORCE(Def().output_size() > 1);
244 g_inputs.push_back(O(1));
247 vector<OperatorDef> ops;
249 ops.push_back(CreateOperatorDef(
250 "RemovePadding",
"", g_inputs, vector<string>{GI(0)}));
252 if (Def().input_size() >= 3) {
253 std::vector<string> padding_grads{GI(2)};
254 if (Def().input_size() == 4) {
255 padding_grads.push_back(GI(3));
257 auto g_inputs2 = g_inputs;
259 CreateOperatorDef(
"GatherPadding",
"", g_inputs2, padding_grads));
267 using GradientMakerBase::GradientMakerBase;
268 vector<OperatorDef> GetGradientDefs()
override {
270 vector<std::string> g_inputs{GO(0)};
271 if (Def().input_size() > 1) {
272 CAFFE_ENFORCE(Def().output_size() > 1);
273 g_inputs.push_back(O(1));
281 OPERATOR_SCHEMA(AddPadding)
285 Given a partitioned tensor $T<N, D_1, ..., D_n>$, where the partitions are 286 defined as ranges on its outer-most (slowest varying) dimension $N$, 287 return a tensor $T<(N + 2 * padding\_width), D_1, ..., D_n>$ with paddings 288 added to the start and end of each range. 290 Optionally, different paddings can be provided for beginning and end. 291 Paddings provided must be a tensor $T<D_1, ..., D_n>$. If no padding is 292 provided, add zero padding. If no lengths vector is provided, add padding 293 only once, at the start and end of data. 297 - https://github.com/pytorch/pytorch/blob/master/caffe2/operators/sequence_ops.cc 301 <summary> <b>Example</b> </summary> 306 workspace.ResetWorkspace() 308 op = core.CreateOperator( 311 ["Y", "lengths_out"], 316 workspace.FeedBlob("X", (np.random.rand(3,2,2).astype(np.float32))) 317 workspace.FeedBlob("lengths", np.array([3]).astype(np.int32)) 319 print("X:", workspace.FetchBlob("X")) 320 workspace.RunOperatorOnce(op) 321 print("Y:", workspace.FetchBlob("Y")) 322 print("lengths_out:", workspace.FetchBlob("lengths_out")) 328 X: [[[0.2531572 0.4588472 ] 329 [0.45140603 0.61161053]] 331 [[0.92500854 0.8045306 ] 332 [0.03356671 0.30233648]] 334 [[0.4660227 0.6287745 ] 335 [0.79372746 0.08609265]]] 339 [[0.2531572 0.4588472 ] 340 [0.45140603 0.61161053]] 342 [[0.92500854 0.8045306 ] 343 [0.03356671 0.30233648]] 345 [[0.4660227 0.6287745 ] 346 [0.79372746 0.08609265]] 358 "*(type: int)* Number of copies of padding to add around each range.")
361 "*(type: int)* [OPTIONAL] Specifies a different end-padding width. If " 362 "this is not set, will use same as `padding_width`.")
366 "*(type: Tensor)* Input data ($T<N, D_1, ..., D_n>$).")
370 "*(type: Tensor`<int>`)* Number of elements in each range. " 375 "*(type: Tensor`<int>`)* [OPTIONAL] Padding data for range start " 376 "($T<D_1, ..., D_n>$).")
380 "*(type: Tensor`<int>`)* [OPTIONAL] Padding for range end. If not " 381 "provided, `start_padding` is used ($T<D_1, ..., D_n>$).")
385 "*(type: Tensor)* Padded data tensor ($T<N + 2*padding_width, " 390 "*(type: Tensor`<int>`)* [OPTIONAL] Lengths for each padded range.");
392 OPERATOR_SCHEMA(RemovePadding)
396 Remove padding around the edges of each segment of the input data. This is the 397 reverse operation of **AddPadding**, and uses the same arguments and conventions 398 for input and output data format. 402 - https://github.com/pytorch/pytorch/blob/master/caffe2/operators/sequence_ops.cc 406 <summary> <b>Example</b> </summary> 411 workspace.ResetWorkspace() 413 addpad_op = core.CreateOperator( 415 ["X", "lengths_add"], 416 ["Y", "lengths_out_add"], 420 rmpad_op = core.CreateOperator( 423 ["Z", "lengths_out_rm"], 427 workspace.FeedBlob("X", (np.random.randint(20, size=(3,5)))) 428 workspace.FeedBlob("lengths_add", np.array([3]).astype(np.int32)) 429 workspace.FeedBlob("lengths_rm", np.array([5]).astype(np.int32)) 431 print("X:", workspace.FetchBlob("X")) 432 workspace.RunOperatorOnce(addpad_op) 433 print("Y:", workspace.FetchBlob("Y")) 434 print("lengths_out_add:", workspace.FetchBlob("lengths_out_add")) 436 workspace.RunOperatorOnce(rmpad_op) 437 print("Z:", workspace.FetchBlob("Z")) 438 print("lengths_out_rm:", workspace.FetchBlob("lengths_out_rm")) 464 "*(type: int)* Outer-size of padding to remove around each range.")
467 "*(type: int)* [OPTIONAL] Specifies a different end-padding width. " 468 "If this is not set, will use same as `padding_width`.")
472 "Input tensor ($T<N, D_1, ..., D_n>$).")
476 "*(type: Tensor`<int>`)* Number of elements in each range. " 477 "sum(lengths) = N. If not provided, considers all data as a single " 482 "*(type: Tensor)* Padded data tensor " 483 "($T<N + 2*padding_width, D_1, ..., D_n>$).")
487 "*(type: Tensor`<int>`)* [OPTIONAL] Lengths for each padded range.");
489 OPERATOR_SCHEMA(GatherPadding)
493 Gather the sum of start and end paddings in a padded input sequence. Used in 494 order to compute the gradients of AddPadding w.r.t the padding tensors. 496 .Arg("padding_width",
"Outer-size of padding present around each range.")
499 "(Optional) Specifies a different end-padding width.")
500 .Input(0,
"data_in",
"T<N, D1..., Dn> Padded input data")
504 "(i64) Num of elements in each range. sum(lengths) = N. " 505 "If not provided, considers all data as a single segment.")
509 "Sum of all start paddings, or of all " 510 "paddings if end_padding_sum is not provided.")
514 "T<D1..., Dn> Sum of all end paddings, if provided.");
516 OPERATOR_SCHEMA(PadEmptySamples)
517 .NumInputs(1, INT_MAX)
518 .NumOutputs(1, INT_MAX)
520 Pad empty field given lengths and index features, 522 Input(0) is a blob pointing to the lengths of samples in one batch, 523 [Input(1),... Input(num_fields)] a list of tensors containing the data for 524 each field of the features. 526 PadEmptySamples is thread safe. 528 .Input(0, "lengths",
"A blob containing a pointer to the lengths.")
532 "Tensor containing lengths with empty sample padded.");
A global dictionary that holds information about what Caffe2 modules have been loaded in the current ...
static vector< OperatorDef > SingleGradientDef(const Args &...args)
a helper function to allow one to create one single operator def, which is usually the case for many ...