def leven_shtein_score(self,query1,query2):#相对编辑距离(相对编辑距离越小,相似度越高)
bias=0.0001
res=(Levenshtein.distance(query1,query2))/(max(len(query1),len(query2))+bias)
return res
def repeat_score(self,query1,query2):#query间的重复度
bias=0.0001
res = len(list(filter(set(query2).__contains__, query1)))/(len(query2)+bias)
return res
def similar_score(self,querys):#query间相似度计算
similarityMatrix=[[0 for i in range(len(querys))] for j in range(len(querys))]
for i in range(len(querys)):
for j in range(len(querys)):
if i==j:
similarityMatrix[i][j]=1
else:
similarityMatrix[i][j]=max((self.repeat_score(querys[i],querys[j])-self.leven_shtein_score(querys[i],querys[j])),0)
return similarityMatrix
def MMR(self,recScoreDict, similarityMatrix,lambdaConstant=0.5, topN=20):#MMR重排(推荐词的相关分数及推荐词间的相似度矩阵)
s, r = [], [i for i in range(len(recScoreDict))]
while len(r) > 0:
score = 0
selectOne = None
for i in r:
firstPart = recScoreDict[i]
secondPart = 0
for j in s:
sim2 = float(similarityMatrix[i][j])
if sim2 > secondPart:
secondPart = sim2
equationScore = lambdaConstant * firstPart - (1 - lambdaConstant) * secondPart
if equationScore > score:
score = equationScore
selectOne = i
if selectOne == None:
selectOne = i
r.remove(selectOne)
s.append(selectOne)
return (s, s[:topN])[topN > len(s)]
MMR重排(相似度通过编辑距离和重复度计算)
最新推荐文章于 2024-03-22 14:22:01 发布