动态规划求编辑距离思路:编辑距离 - 动态规划解法 Edit Distance - Dynamic Programming_哔哩哔哩_bilibili
我的改动:忽略识别结果中多出来的字符
思路:如果最小值来自同列的上面一行,则需要删除操作。此时不再加一,即为忽略删除操作。
核心改动:d[i][j] = min(d[i - 1][j], d[i][j - 1], d[i - 1][j - 1]) + min(np.argmin([d[i - 1][j], d[i][j - 1], d[i - 1][j - 1]]), 1)
def damerau_levenshtein_distance_no_del(string1,string2):
m = len(string1)
n = len(string2)
# n+1行 m+1列 表示由 m 对应的字符串转到 n 对应的字符串
d = [[0] * (n + 1) for _ in range(m + 1)]
# 初始化第 1 行 一个空字符到其他字符的操作数
for j in range(n + 1):
d[0][j] = j
# 初始化第 1 列 其他字符到空字符的操作数
d[1][0] = 1
for i in range(2,m + 1):
d[i][0] = min(i, d[i-1][0])
# 自底向上递推计算每个 d[i][j] 的值
# 一行行填充值
for i in range(1, m + 1):
string11 = string1[0:i]
for j in range(1, n + 1):
string22 = string2[0:j]
if string1[i - 1] == string2[j - 1]:
d[i][j] = d[i - 1][j - 1]
pass
else:
import numpy as np
d[i][j] = min(d[i - 1][j], d[i][j - 1], d[i - 1][j - 1]) + min(np.argmin([d[i - 1][j], d[i][j - 1], d[i - 1][j - 1]]), 1)
pass
# if i > 1 and j > 1 and string1[i - 1] == string2[j - 2] and string1[i - 2] == string2[j - 1]:
# d[i][j] = min(d[i][j], d[i - 2][j - 2] + 1)
return d[m][n]
下面是普通的编辑距离
def damerau_levenshtein_distance(string1, string2):
m = len(string1)
n = len(string2)
d = [[0] * (n + 1) for _ in range(m + 1)]
# 初始化第 1 列 其他字符到空字符的操作数
for i in range(m + 1):
d[i][0] = i
# 初始化第 1 行 一个空字符到其他字符的操作数
for j in range(n + 1):
d[0][j] = j
# 自底向上递推计算每个 d[i][j] 的值
for i in range(1, m + 1):
for j in range(1, n + 1):
if string1[i - 1] == string2[j - 1]:
d[i][j] = d[i - 1][j - 1]
else:
d[i][j] = min(d[i - 1][j], d[i][j - 1], d[i - 1][j - 1]) + 1
# if i > 1 and j > 1 and string1[i - 1] == string2[j - 2] and string1[i - 2] == string2[j - 1]:
# d[i][j] = min(d[i][j], d[i - 2][j - 2] + 1)
return d[m][n]