最近在看推荐系统实践这本书
整体来看,书比较偏向与工程吧
但是我发现了代码有很多错误。
这里提供给大家一个可以运行的python语言。
UserCF程序中user相似度计算程序:
def usersimilarity(train):
#build inverse table for item_users
item_users = dict()
for u, items in train.items():
for i in items:
if i not in item_users:
item_users[i] = set()
item_users[i].add(u)
#calculate co-rated items between users
# c = [[0 for i in range(0,number)] for i in range(0,number)]
# n = [0 for i in range(0,number)]
c = dict()
n = dict()
for i, users in item_users.items():
for u in users:
if n.has_key(u):
n[u]+=1
else:
n[u] =1
for v in users:
if u==v:
continue
if c.has_key((u,v)):
c[u,v] += 1
else:
c[u,v] = 1
# print c
#calculated finial similarity matrix w
w = dict()
for u, value in c.items():
a = u[0]
b = u[1]
temp = {}
temp[b] = value/math.sqrt(n[a]*n[b])
# for v, cuv in enumerate(related_users):
if w.has_key(a):
w[a] = dict(w[a].items()+temp.items())
else:
w[a] = temp
return w
ItemCF的相似度计算:
def itemsimilarity(train):
#the num