from math import sqrt
"""
余弦相似度
"""
def sim_distance_cos(p1,p2):
c = set(p1.keys())&set(p2.keys())
if not c:
return 0
ss = sum([p1.get(sk)*p2.get(sk) for sk in c])
sq1 = sqrt(sum([pow(p1.get(sk),2) for sk in p1.values()]))
sq2 = sqrt(sum([pow(p2.get(sk),2) for sk in p2.values()]))
p = float(ss)/(sq1*sq2)
return p
from math import sqrt
"""
皮尔逊相关度
"""
def sim_distance_pearson(p1,p2):
c = set(p1.keys())&set(p2.keys())
if not c:
return 0
s1 = sum([p1.get(sk) for sk in c])
s2 = sum([p2.get(sk) for sk in c])
sq1 = sum([pow(p1.get(sk),2) for sk in c])
sq2 = sum([pow(p2.get(sk),2) for sk in c])
ss = sum([p1.get(sk)*p2.get(sk) for sk in c])
n = len(c)
num = ss-s1*s2/n
den = sqrt((sq1-pow(s1,2)/n)*(sq2-pow(s2-2)/n))
if den == 0:
return 0
p = num/den
return p
from math import sqrt
"""
欧几里得空间法 计算相似度
"""
def sim_distance(p1, p2):
c = set(p1.keys())&set(p2.keys())
if not c:
return 0
sum_of_squares = sum([pow(p1.get(sk)-p2.get(sk),2) for sk in c])
p = 1/(1+sqrt(sum_of_squares))
return p
"""
Jaccard系数
"""
def sim_distance_jaccard(p1,p2):
c = set(p1.keys())&set(p2.keys())
if not c:
return 0
ss = sum([p1.get(sk)*p2.get(sk) for sk in c])
sq1 = sum([pow(sk,2) for sk in p1.values()])
sq2 = sum([pow(sk,2) for sk in p2.values()])
p = float(ss)/(sq1+sq2-ss)
return p
from math import sqrt"""余弦相似度"""def sim_distance_cos(p1,p2): c = set(p1.keys())&set(p2.keys()) if not c: return 0 ss = sum([p1.get(sk)*p2.get(sk) for sk in c]) sq1 = sq...