1 #include "caffe2/operators/string_ops.h" 2 #include "caffe2/core/operator.h" 8 bool StringJoinOp<CPUContext>::DoRunWithType() {
9 const auto& input = Input(0);
11 CAFFE_ENFORCE_GT(input.numel(), 0);
12 CAFFE_ENFORCE_LE(input.dim(), 2,
"Only 1-D and 2-D tensors are supported");
14 const auto* inputData = input.data<
T>();
15 int rowSize = (input.dim() == 2) ? input.size(1) : 1;
16 if (this->axis_ == 0) {
17 auto* output = Output(0, {input.size(0)}, at::dtype<std::string>());
18 auto* outputData = output->template mutable_data<std::string>();
21 for (
int i = 0; i < input.size(0); ++i) {
22 std::stringstream stream;
25 inputData + offset + rowSize,
26 std::ostream_iterator<T>(stream, delimiter_.c_str()));
27 outputData[i] = stream.str();
30 }
else if (this->axis_ == 1) {
31 auto* output = Output(0, {input.size(1)}, at::dtype<std::string>());
32 auto* outputData = output->template mutable_data<std::string>();
34 for (
int j = 0; j < input.size(1); ++j) {
35 std::stringstream stream;
36 for (
int i = 0; i < input.size(0); ++i) {
37 stream << inputData[i * rowSize + j] << delimiter_;
39 outputData[j] = stream.str();
42 CAFFE_ENFORCE(
false,
"Not supported");
51 explicit StartsWith(OperatorBase& op)
52 : prefix_(op.GetSingleArgument<
std::string>(
"prefix",
"")) {}
53 bool operator()(
const std::string& str) {
54 return std::mismatch(prefix_.begin(), prefix_.end(), str.begin()).first ==
63 explicit EndsWith(OperatorBase& op)
64 : suffix_(op.GetSingleArgument<
std::string>(
"suffix",
"")) {}
65 bool operator()(
const std::string& str) {
66 return std::mismatch(suffix_.rbegin(), suffix_.rend(), str.rbegin())
67 .first == suffix_.rend();
75 explicit Prefix(OperatorBase& op)
76 : length_(op.GetSingleArgument<int>(
"length", 3)) {}
77 std::string operator()(
const std::string& str) {
78 return std::string(str.begin(), std::min(str.end(), str.begin() + length_));
86 explicit Suffix(OperatorBase& op)
87 : length_(op.GetSingleArgument<int>(
"length", 3)) {}
88 std::string operator()(
const std::string& str) {
89 return std::string(std::max(str.begin(), str.end() - length_), str.end());
96 template <
typename ScalarFunctor,
typename TypeMap = FixedType<std::
string>>
97 using StringElementwiseOp = UnaryElementwiseWithArgsOp<
98 TensorTypes<std::string>,
100 ForEach<ScalarFunctor>,
103 REGISTER_CPU_OPERATOR(StringPrefix, StringElementwiseOp<Prefix>);
104 REGISTER_CPU_OPERATOR(StringSuffix, StringElementwiseOp<Suffix>);
105 REGISTER_CPU_OPERATOR(
107 StringElementwiseOp<StartsWith, FixedType<bool>>);
108 REGISTER_CPU_OPERATOR(
110 StringElementwiseOp<EndsWith, FixedType<bool>>);
111 REGISTER_CPU_OPERATOR(StringJoin, StringJoinOp<CPUContext>);
113 OPERATOR_SCHEMA(StringPrefix)
117 Computes the element-wise string prefix of the string tensor. 118 Input strings that are shorter than prefix length will be returned unchanged. 119 NOTE: Prefix is computed on number of bytes, which may lead to wrong behavior 120 and potentially invalid strings for variable-length encodings such as utf-8. 122 .Arg("length",
"Maximum size of the prefix, in bytes.")
123 .Input(0,
"strings",
"Tensor of std::string.")
127 "Tensor of std::string containing prefixes for each input.");
129 OPERATOR_SCHEMA(StringSuffix)
133 Computes the element-wise string suffix of the string tensor. 134 Input strings that are shorter than suffix length will be returned unchanged. 135 NOTE: Prefix is computed on number of bytes, which may lead to wrong behavior 136 and potentially invalid strings for variable-length encodings such as utf-8. 138 .Input(0, "strings",
"Tensor of std::string.")
142 "Tensor of std::string containing suffixes for each output.")
143 .Arg(
"length",
"Maximum size of the suffix, in bytes.");
145 OPERATOR_SCHEMA(StringStartsWith)
149 Performs the starts-with check on each string in the input tensor. 150 Returns tensor of boolean of the same dimension of input. 152 .Arg("prefix",
"The prefix to check input strings against.")
153 .Input(0,
"strings",
"Tensor of std::string.")
154 .Output(0,
"bools",
"Tensor of bools of same shape as input.");
156 OPERATOR_SCHEMA(StringEndsWith)
160 Performs the ends-with check on each string in the input tensor. 161 Returns tensor of boolean of the same dimension of input. 163 .Arg("suffix",
"The suffix to check input strings against.")
164 .Input(0,
"strings",
"Tensor of std::string.")
165 .Output(0,
"bools",
"Tensor of bools of same shape as input.");
167 OPERATOR_SCHEMA(StringJoin)
171 Takes a 1-D or a 2-D tensor as input and joins elements in each row with the 172 provided delimiter. Output is a 1-D tensor of size equal to the first dimension 173 of the input. Each element in the output tensor is a string of concatenated 174 elements corresponding to each row in the input tensor. For 1-D input, each 175 element is treated as a row. 177 .Arg("delimiter",
"Delimiter for join (Default: \",\").")
178 .Arg(
"axis",
"Axis for the join (either 0 or 1)")
179 .Input(0,
"input",
"1-D or 2-D tensor")
183 "1-D tensor of strings created by joining row elements from the " 186 SHOULD_NOT_DO_GRADIENT(StringPrefix);
187 SHOULD_NOT_DO_GRADIENT(StringSuffix);
188 SHOULD_NOT_DO_GRADIENT(StringStartsWith);
189 SHOULD_NOT_DO_GRADIENT(StringEndsWith);
190 SHOULD_NOT_DO_GRADIENT(StringJoin);
A global dictionary that holds information about what Caffe2 modules have been loaded in the current ...