Caffe2 - C++ API
A deep learning, cross platform ML framework
edit_distance.cpp
1 #include <torch/csrc/jit/script/edit_distance.h>
2 #include <algorithm>
3 #include <cstring>
4 #include <memory>
5 
6 namespace torch {
7 namespace jit {
8 namespace script {
9 
10 // computes levenshtein edit distance between two words
11 // returns maxEditDistance + 1 if the edit distance exceeds MaxEditDistance
12 // reference: http://llvm.org/doxygen/edit__distance_8h_source.html
13 size_t ComputeEditDistance(
14  const char* word1,
15  const char* word2,
16  size_t maxEditDistance) {
17 
18  size_t m = strlen(word1);
19  size_t n = strlen(word2);
20 
21  const unsigned small_buffer_size = 64;
22  unsigned small_buffer[small_buffer_size];
23  std::unique_ptr<unsigned[]> allocated;
24  unsigned* row = small_buffer;
25  if (n + 1 > small_buffer_size) {
26  row = new unsigned[n + 1];
27  allocated.reset(row);
28  }
29 
30  for (unsigned i = 1; i <= n; ++i)
31  row[i] = i;
32 
33  for (size_t y = 1; y <= m; ++y) {
34  row[0] = y;
35  unsigned best_this_row = row[0];
36 
37  unsigned previous = y - 1;
38  for (size_t x = 1; x <= n; ++x) {
39  int old_row = row[x];
40  row[x] = std::min(
41  previous + (word1[y - 1] == word2[x - 1] ? 0u : 1u),
42  std::min(row[x - 1], row[x]) + 1);
43  previous = old_row;
44  best_this_row = std::min(best_this_row, row[x]);
45  }
46 
47  if (maxEditDistance && best_this_row > maxEditDistance)
48  return maxEditDistance + 1;
49  }
50 
51  unsigned result = row[n];
52  return result;
53 }
54 
55 } // namespace script
56 } // namespace jit
57 } // namespace torch
Definition: jit_type.h:17