Caffe2 - C++ API
A deep learning, cross platform ML framework
string_utils.cc
1 
17 #include "caffe2/utils/string_utils.h"
18 
19 #include <algorithm>
20 #include <sstream>
21 #include <vector>
22 
23 namespace caffe2 {
24 
25 std::vector<std::string> split(char separator, const std::string& string) {
26  std::vector<std::string> pieces;
27  std::stringstream ss(string);
28  std::string item;
29  while (getline(ss, item, separator)) {
30  pieces.push_back(std::move(item));
31  }
32  return pieces;
33 }
34 
35 size_t editDistance(
36  const std::string& s1, const std::string& s2, size_t max_distance)
37  {
38  std::vector<size_t> current(s1.length() + 1);
39  std::vector<size_t> previous(s1.length() + 1);
40  std::vector<size_t> previous1(s1.length() + 1);
41 
42  return editDistanceHelper(
43  s1.c_str(),
44  s1.length(),
45  s2.c_str(),
46  s2.length(),
47  current,
48  previous,
49  previous1,
50  max_distance
51  );
52  }
53  #define NEXT_UNSAFE(s, i, c) { \
54  (c)=(uint8_t)(s)[(i)++]; \
55  }
56 
57 int32_t editDistanceHelper(const char* s1,
58  size_t s1_len,
59  const char* s2,
60  size_t s2_len,
61  std::vector<size_t> &current,
62  std::vector<size_t> &previous,
63  std::vector<size_t> &previous1,
64  size_t max_distance) {
65  if (max_distance) {
66  if (std::max(s1_len, s2_len) - std::min(s1_len, s2_len) > max_distance) {
67  return max_distance+1;
68  }
69  }
70 
71  for (size_t j = 0; j <= s1_len; ++j) {
72  current[j] = j;
73  }
74 
75  int32_t str2_offset = 0;
76  char prev2 = 0;
77  for (size_t i = 1; i <= s2_len; ++i) {
78  swap(previous1, previous);
79  swap(current, previous);
80  current[0] = i;
81 
82  char c2 = s2[str2_offset];
83  char prev1 = 0;
84  int32_t str1_offset = 0;
85 
86  NEXT_UNSAFE(s2, str2_offset, c2);
87 
88  size_t current_min = s1_len;
89  for (size_t j = 1; j <= s1_len; ++j) {
90  size_t insertion = previous[j] + 1;
91  size_t deletion = current[j - 1] + 1;
92  size_t substitution = previous[j - 1];
93  size_t transposition = insertion;
94  char c1 = s1[str1_offset];
95 
96  NEXT_UNSAFE(s1, str1_offset, c1);
97 
98  if (c1 != c2) {
99  substitution += 1;
100  }
101 
102 
103  if (prev1 == c2 && prev2 == c1 && j > 1 && i > 1) {
104  transposition = previous1[j - 2] + 1;
105  }
106  prev1 = c1;
107 
108  current[j] = std::min(std::min(insertion, deletion),
109  std::min(substitution, transposition));
110  current_min = std::min(current_min, current[j]);
111  }
112 
113 
114  if (max_distance != 0 && current_min > max_distance) {
115  return max_distance+1;
116  }
117 
118  prev2 = c2;
119  }
120 
121  return current[s1_len];
122  }
123 
124 } // namespace caffe2
Copyright (c) 2016-present, Facebook, Inc.