1、基于UserCF算法
import math
import operator
def UserSimilarity(train): # P45 最基本的UserCF
W = dict()
for u in train.keys():
for v in train.keys():
if u == v:
continue
if W.get(u):
W[u].update({v: len(train[u].keys() & train[v].keys())})
else:
W.update({u: {v: len(train[u].keys() & train[v].keys())}})
W[u][v] = round(W[u][v] / math.sqrt(len(train[u])*len(train[v])*1.0) ,2)
return W
def UserSimilarity2(train): # P46 利用倒排计算相关性
item_users = dict()
for u, items in train.items():
for i in items.keys():
if i not in item_users:
item_users[i] = set()
item_users[i].add(u)
C = dict()
N = dict()
for i, users in item_users.items():
for u in users:
if N.get(u):
N[u] += 1
else:
N.setdefault(u, 1)
for v in users:
if u == v:
continue
if C.get(u):
if C.get(u).get(v):
C[u][v] += 1
else:
C[u].setdefault(v, 1)
else:
C.update({u: {v: 1}})
W = dict()
for u, related_users in C.items():
W[u] = dict()
for v, cuv in related_users.items():
W[u][v] = round(cuv / math.sqrt(N[u] * N[v]), 2)
return W
def UseSimilarity3(train): # P48 改进后的UserCF
item_users = dict()
for u, items in train.items():
for i in items.keys():
if i not in item_users:
item_users[i] = set()
item_users[i].add(u)
C = dict()
N = dict()
for i, users in item_users.items():
for u in users:
if N.get(u):
N[u] += 1
else:
N.setdefault(u, 1)
for v in users:
if u == v:
continue
if C.get(u):
if C.get(u).get(v):
C[u][v] += 1/math.log(1 + len(users))
else:
C[u].setdefault(v, 1/math.log(1 + len(users)))
else:
C.update({u: {v: 1/math.log(1 + len(users))}})
W = dict()
for u, related_users in C.items():
W[u] = dict()
for v, cuv in related_users.items():
W[u][v] = round(cuv / math.sqrt(N[u] * N[v]), 2)
return W
def Recommend(train, user, W, K): # P47 简单推荐
rank = dict()
interacted_items = train[user]
for v, wuv in sorted(W[user].items(), key=operator.itemgetter(1), reverse=True)[0:K]:
for i, rvi in train[v].items():
if i in interacted_items:
continue
if rank.get(i):
rank[i] += wuv * rvi
else:
rank[i] = wuv * rvi
return rank
train = {'A': {'a': 1, 'b': 1, 'd': 1}, 'B': {'a': 1, 'c': 1}, 'C': {'b': 1, 'e': 1}, 'D': {'c': 1, 'd': 1, 'e': 1}}
W = UserSimilarity(train)
W2 = UserSimilarity2(train)
W3 = UseSimilarity3(train)
result = Recommend(train, 'A', W, 3)
# print(W)
# print(W2)
# print(W3)
# print(result)