# -*- coding: utf-8 -*-
from math import sqrt
# Returns a distance-based similarity score for person1 and person2
def sim_distance(prefs, person1, person2):
# Get the list of shared_items
si={}
for item in prefs[person1]:
if item in prefs[person2]: si[item]=1
# if they have no ratings in common, return 0
if len(si) == 0: return 0
# Add up the squares of all the differences
sum_of_squares=sum([pow(prefs[person1][item]-prefs[person2][item], 2)
for item in prefs[person1] if item in prefs[person2]])
return 1/(1+sum_of_squares)
#计算皮尔逊相关度(1为完全正相关,-1为完成负相关)
def sim_pearson(prefs, p1, p2):
# Get the list of mutually rated items
si = {}
for item in prefs[p1]:
if item in prefs[p2]:
si[item] = 1
# if they are no ratings in common, return 0
if len(si) == 0:
return 0
# Sum calculations
n = len(si)
# Sums of all the preferences
sum1 = sum([prefs[p1][it] for it in si])
sum2 = sum([prefs[p2][it] for it in si])
# Sums of the squares
sum1Sq = sum([pow(prefs[p1][it], 2) for it in si])
sum2Sq = sum([pow(prefs[p2][it], 2) for it in si])
# Sum of the products
pSum = sum([prefs[p1][it] * prefs[p2][it] for it in si])
# Calculate r (Pearson score)
num = pSum - (sum1 * sum2 / n)
den = sqrt((sum1Sq - pow(sum1, 2) / n) * (sum2Sq - pow(sum2, 2) / n))
if den == 0:
return 0
r = num / den
return r
def topMatches(prefs, person, n=5, similarity=sim_pearson):
scores=[(similarity(prefs, person, other), other) for other in prefs if other != person]
scores.sort()
scores.reverse()
return scores[0: n]
# 矩阵转置
def transformPrefs(prefs):
result = {}
for person in prefs:
for item in prefs[person]:
result.setdefault(item, {})
# Flip item and person
result[item][person] = prefs[person][item]
return result
# 计算item之间的相似度
def calculateSimilarItems(prefs, n=10):
# Create a dictionary of items showing which other items they
# are most similar to.
result = {}
# Invert the preference matrix to be item-centric
itemPrefs = transformPrefs(prefs)
c = 0
for item in itemPrefs:
# Status updates for large datasets
c += 1
if c % 100 == 0: print("%d / %d" % (c,len(itemPrefs)))
# Find the most similar items to this one
scores=topMatches(itemPrefs, item, n=n, similarity=sim_distance)
result[item] = scores
return result
def getRecommendedItems(prefs, itemMatch, user):
userRatings = prefs[user]
scores = {}
totalSim = {}
# Loop over items rated by this user
for (item, rating) in userRatings.items( ):
# Loop over items similar to this one
for (similarity, item2) in itemMatch[item]:
# Ignore if this user has already rated this item
if item2 in userRatings: continue
# Weighted sum of rating times similarity
scores.setdefault(item2, 0)
scores[item2] += similarity*rating
# Sum of all the similarities
totalSim.setdefault(item2, 0)
totalSim[item2] += similarity
# Divide each total score by total weighting to get an average
rankings = [(score/totalSim[item], item) for item, score in scores.items()]
# Return the rankings from highest to lowest
rankings.sort()
rankings.reverse()
return rankings
if __name__ == "__main__":
# 原始数据, 评分范围为1-5,对应不喜欢到非常喜欢
data = {
'user1': {"Dota2": 1, "League of Legends": 2, "Hearthstone": 3, "World of Warcraft": 4},
'user2': {"Dota2": 1, "League of Legends": 3, "Hearthstone": 2, "World of Warcraft": 1},
'user3': {"Dota2": 2, "League of Legends": 2, "Hearthstone": 1, "World of Warcraft": 4}
}
# 插入待推荐用户的数据(ns2250225)
data['ns2250225'] = {'Dota2': 1, 'League of Legends': 2}
# 计算所有游戏间的相似度
# 可以把计算好的结果用pickle模块持久化到本地
# 下次就直接加载,不用重新计算
itemSim = calculateSimilarItems(data)
# 获取推荐的结果列表
recommend_item = getRecommendedItems(data, itemSim, "ns2250225")
print(str(recommend_item))
# [(1.6363636363636362, 'Hearthstone'), (1.5185185185185186, 'World of Warcraft')]