import Levenshtein as lvst
编辑距离计算相似度
Levenshtein Distance
def leven_distance(s1, s2):
dis = lvst.distance(s1, s2)
# 1-它们的距离/两个字符串长度的最大值
sim = 1 - dis/max(len(s1), len(s2))
return sim
#Dice系数计算相似度
def dice_distance(s1, s2):
s1 = set(s1)
s2 = set(s2)
overlap = len(s1 & s2)
sim = overlap * 2.0/(len(s1) + len(s2))
return sim
Jaccard系数计算相似度
def jaccard_distance(s1, s2):
s1 = set(s1)
s2 = set(s2)
sim = len(s1 & s2)/len(s1 | s2)
return sim
if name == “main”:
s1 = ‘kitten’
s2 = ‘sitting’
s3 = ‘年纪’
s4 = ‘年龄’
res1 = leven_distance(s3, s4)
res = jaccard_distance(s3, s4)
print(res)