# Returns a distance-based similarity score for person1 and person2defsim_distance(prefs,person1,person2):# Get the list of shared_items
si={}
for item in prefs[person1]:
if item in prefs[person2]: si[item]=1# if they have no ratings in common, return 0if len(si)==0: return0# Add up the squares of all the differences
sum_of_squares=sum([pow(prefs[person1][item]-prefs[person2][item],2)
for item in prefs[person1] if item in prefs[person2]])
return1/(1+sum_of_squares)
皮尔逊相似度
# Returns the Pearson correlation coefficient for p1 and p2defsim_pearson(prefs,p1,p2):# Get the list of mutually rated items
si={}
for item in prefs[p1]:
if item in prefs[p2]: si[item]=1# if they are no ratings in common, return 0if len(si)==0: return0# Sum calculations
n=len(si)
# Sums of all the preferences
sum1=sum([prefs[p1][it] for it in si])
sum2=sum([prefs[p2][it] for it in si])
# Sums of the squares
sum1Sq=sum([pow(prefs[p1][it],2) for it in si])
sum2Sq=sum([pow(prefs[p2][it],2) for it in si])
# Sum of the products
pSum=sum([prefs[p1][it]*prefs[p2][it] for it in si])
# Calculate r (Pearson score)
num=pSum-(sum1*sum2/n)
den=sqrt((sum1Sq-pow(sum1,2)/n)*(sum2Sq-pow(sum2,2)/n))
if den==0: return0
r=num/den
return r
Tanimoto系数
#代表交集与并集的比率deftanamoto(v1,v2):
c1,c2,shr=0,0,0for i in range(len(v1)):
if v1[i]!=0: c1+=1# in v1if v2[i]!=0: c2+=1# in v2if v1[i]!=0and v2[i]!=0: shr+=1# in bothreturn1.0-(float(shr)/(c1+c2-shr))