似乎在LeetCode里面也有,不过这里是为了解决另外的问题而做的。假设两个string,有三种不同的操作:添加,删除,替换。如何通过最少步骤让一个字符串变换成为另一个字符串?这个最少步骤数即为Levenshtein edit distance。做法是用动态规划,见http://en.wikipedia.org/wiki/Edit_distance。
#include <cassert>
#include <iostream>
#include <memory>
#include <string>
int edit_distance(const std::string& s1, const std::string& s2) {
const int s1_length = s1.length();
const int s2_length = s2.length();
const int s1_elems = s1_length + 1;
const int s2_elems = s2_length + 1;
const int total_elems = s1_elems * s2_elems;
std::unique_ptr<int[]> distance_matrix(new int[total_elems]);
auto access = [&distance_matrix, s1_elems, s2_elems](int index_s1, int index_s2) -> int& {
assert(index_s1 >= 0 && index_s1 < s1_elems);
assert(index_s2 >= 0 && index_s2 < s2_elems);
return distance_matrix[index_s2 * s1_elems + index_s1];
};
std::fill(distance_matrix.get(), distance_matrix.get() + total_elems, 0);
for(auto i = 0; i < s1_elems; ++i)
access(i, 0) = i;
for(auto i = 0; i < s2_elems; ++i)
access(0, i) = i;
// Note: the iterator starts from 0, but when we map it to the matrix, the
// index should be increased by 1
for(auto s1_iter = 0; s1_iter < s1_length; ++s1_iter)
for(auto s2_iter = 0; s2_iter < s2_length; ++s2_iter) {
const char s1_ch = s1[s1_iter];
const char s2_ch = s2[s2_iter];
if (s1_ch == s2_ch) {
access(s1_iter + 1, s2_iter + 1) = access(s1_iter, s2_iter);
}
else {
access(s1_iter + 1, s2_iter + 1) = std::min(std::min(
access(s1_iter + 1, s2_iter),
access(s1_iter, s2_iter + 1)),
access(s1_iter, s2_iter)) + 1;
}
}
return access(s1_length, s2_length);
}
int main(int argc, char* argv[]) {
if (argc != 3) {
std::cerr<<"Usage: edit_distance [string1] [string2]"<<std::endl;
return -1;
}
std::cout<<edit_distance(argv[1], argv[2])<<std::endl;
return 0;
}