# Python动态规划以及编辑距离——莱文斯坦距离小记

ps：《算法图解》这本书一定得看完

import numpy

def wer2(r, h):
'''
This function was originally written by Martin Thoma
https://martin-thoma.com/word-error-rate-calculation/
Calculation of WER with Levenshtein distance.
Works only for iterables up to 254 elements (uint8).
O(nm) time ans space complexity.
Parameters
----------
r : list
h : list
Returns
-------
int
Examples
--------
>>> wer("who is there".split(), "is there".split())
1
>>> wer("who is there".split(), "".split())
3
>>> wer("".split(), "who is there".split())
3
'''
# Initialization

#生成一个全是0的二维数组
d = numpy.zeros((len(r)+1)*(len(h)+1), dtype=numpy.uint8)
print(d)
#此时的d形如：[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
#将二维数组变成n*n的格式
d = d.reshape((len(r)+1, len(h)+1))
print(d)
#此时的的形如：
"""
[[0 0 0 0]
[0 0 0 0]
[0 0 0 0]
[0 0 0 0]]
"""
#为数组两侧赋值
for i in range(len(r)+1):
for j in range(len(h)+1):
if i == 0:
d[0][j] = j
elif j == 0:
d[i][0] = i
print(d)
#此时的d形如：
"""
[[0 1 2 3]
[1 0 0 0]
[2 0 0 0]
[3 0 0 0]
]
"""
# Computation
#计算编辑距离
for i in range(1, len(r)+1):
for j in range(1, len(h)+1):
if r[i-1] == h[j-1]:
d[i][j] = d[i-1][j-1]
else:
#替换
substitution = d[i-1][j-1] + 1
#插入
insertion    = d[i][j-1] + 1
#删除
deletion     = d[i-1][j] + 1
print(substitution,insertion,deletion)
d[i][j] = min(substitution, insertion, deletion)
print(d)
#最终的d形如：
"""
[[0 1 2 3]
[1 1 2 3]
[2 2 1 2]
[3 3 2 2]]
"""
return d[len(r)][len(h)]
if __name__ == '__main__':
res=wer2(['c','b','c'],['a','b','d'])
print(res)

[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
[[0 0 0 0]
[0 0 0 0]
[0 0 0 0]
[0 0 0 0]]
[[0 1 2 3]
[1 0 0 0]
[2 0 0 0]
[3 0 0 0]]
1 2 2
2 2 3
3 3 4
2 3 2
3 2 4
3 4 3
3 4 2
2 3 3
[[0 1 2 3]
[1 1 2 3]
[2 2 1 2]
[3 3 2 2]]
2

05-16 6122

05-23 4211
01-12 1512
08-14 1227
10-20 3094
05-07 1万+
11-12 274
09-26 3579
03-30 1242
05-21 3487
06-13 1174