Levenshtein是编辑距离,在自然语言处理中很常见。
# coding=utf-8
def Levenshtein(s, t):
len_s = len(s)
len_t = len(t)
temp = [[0 for i in range(0, len_t+1)] for j in range(len_s+1)]
# print(temp)
# 将第一行和第一列初始化 0,1,2,3,。。。
for i in range(len_s+1):
temp[i][0] = i
for i in range(len_t+1):
temp[0][i] = i
# 更新矩阵
for i in range(1, len_s+1):
for j in range(1, len_t+1):
if s[i - 1] == t[j - 1]:
flag = 0
else:
flag = 1
temp[i][j] = min(temp[i - 1][j - 1] + flag,
temp[i][j - 1] + 1, temp[i - 1][j] + 1)
print(temp[len_s][len_t] / (max(len_s, len_t)))
for i in temp:
print (i)
print(temp[len_s][len_t])
if __name__=="__main__":
# //要比较的两个字符串
Levenshtein("ddddsssss",'dddsssss')
d d d s s s s s
d [0, 1, 2, 3, 4, 5, 6, 7]
d [1, 0, 1, 2, 3, 4, 5, 6]
d [2, 1, 0, 1, 2, 3, 4, 5]
d [3, 2, 1, 0, 1, 2, 3, 4]
s [4, 3, 2, 1, 1, 2, 3, 4]
s [5, 4, 3, 2, 1, 1, 2, 3]
s [6, 5, 4, 3, 2, 1, 1, 2]
s [7, 6, 5, 4, 3, 2, 1, 1]
s [8, 7, 6, 5, 4, 3, 2, 1]
以上是python实现