# coding=utf-8'''
推荐系统实践:第八章 评分预测问题
评分预测算法
'''import random
import math
classCluster:def__init__(self,records):
self.group = dict()
defGetGroup(self, i):return0classIdCluster(Cluster):def__init__(self, records):
Cluster.__init__(self, records)
defGetGroup(self, i):return i
classUserActivityCluster(Cluster):def__init__(self, records):
Cluster.__init__(self, records)
activity = dict()
for r in records:
if r.test != 0:
continue
basic.AddToDict(activity, r.user, 1)
k = 0for user, n in sorted(activity.items(),key=itemgetter(1),reverse=False):
c = int((k*5)/(1.0 * len(activity)))
self.group[user] = c
k += 1defGetGroup(self, uid):if uid notin self.group:
return -1else:
return self.group[uid]
classItemPopularityCluster(Cluster):def__init__(self, records):
Cluster.__init__(self, records)
popularity = dict()
for r in records:
if r.test != 0:
continue
basic.AddToDict(popularity, r.item, 1)
k = 0for item, n in sorted(popularity.items(), key=itemgetter(1), reverse=False):
c = int((k*5)/(1.0*len(popularity)))
self.group[item] = c
k += 1defGetGroup(self, item):if item notin self.group:
return -1else:
return self.group[item]
classUserVoteCluster(Cluster):def__init__(selfl,records):
Cluster.__init__(self, records)
vote = dict()
count = dict()
for r in records:
if r.test != 0:
continue
basic.AddToDict(vote, r.user, r.vote)
basic.AddToDict(count, r.user, 1)
k = 0for user, v in vote.items():
ave = v / (count[user] * 1.0)
c = int(ave *2)
self.group[user] = c
defGetGroup(self, uid):if uid notin self.group:
return -1else:
return self.group[uid]
classItemVoteCluster(Cluster):def__init__(self, records):
Cluster.__init__(self, records)
vote = dict()
count = dict()
for r in records:
if r.test != 0:
continue
basic.AddToDict(vote, r.item, r.vote)
basic.AddToDict(count, r.item, 1)
k = 0for item, v in vote.items():
ave = v/(count[item]*1.0)
c = int(ave*2)
self.group[item] = c
defGetGroup(self, item):if item notin self.group:
return -1else:
return self.group[item]
#基于类类平均值预测defPredictAll(records,user_clouster,item_cluster):
total = dict()
count = dict()
for r in records:
if r.test != 0:
continue
gu = user_clouster.GetGroup(r.user)
gi = item_cluster.GetGroup(r.item)
basic.AddToMat(total, gu, gi, r.vote)
basic.AddToMat(count, gu, gi, 1)
for r in records:
gu = user_clouster.GetGroup(r.user)
gi = item_cluster.GetGroup(r.item)
average = total[gu][gi] / (1.0*count[gu][gi]+1.0)
r.predict = average
#基于邻域的方法defUserSimilarity(records):
item_users = dict()
ave_vote = dict()
activity = dict()
for r in records:
addToMat(item_users, r.item, r.user, r.value)
addToVec(ave_vote, r.user, r.value)
addToVec(activity, r.user, 1)
ave_vote = {x:y/activity[x] for x,y in ave_vote.items()}
nu = dict()
W = dict()
for i,ri in item_users.items():
for u, rui in ri.items():
addToVec(nu, u, (rui - ave_vote[u])*(rui - ave_vote[u]))
for v, rvi in ri.items():
if u == v:
continue
addToMat(W, u, v, (rui - ave_vote[u])*(rvi - ave_vote[u]))
for u in W:
W[u] = {x:y/math.sqrt(nu[x]*nu[u]) for x,y in W[u].items()}
return W
defPredictAll(records, test, ave_vote, W, K):
user_items = dict()
for r in records:
addToMat(user_items, r.user, r.item, r.value)
for r in test:
r.predict = 0
norm = 0for v,wuv in sorted(W[r.user].items(), key = itemgetter(1), reverse = True )[0 : K]:
if r.item in user_items[v]:
rvi = user_items[v][r.item]
r.predict += wuv * (rvi - ave_vote[v])
norm += abs(wuv)
if norm > 0:
r.predict /= norm
r.predict += ave_vote[r.user]
# LFM模型迭代过程defLearningLFM(train, F, n, alpha, lamda):
[p,q] = InitLFM(train, F)
for step in range(0,n):
for u, i, rui in train.items():
pui = Predict(u, i, p, q)
eui = rui - pui
for f in range(0,F):
p[u][k] += alpha * (q[i][k]*eui - lamda * p[u][k])
q[i][k] += alpha * (p[u][k]*eui - lamda * q[i][k])
alpha *= 0.9return list(p,q)
# 初始化 p,qdefInitLFM(train, F):
p = dict()
q = dict()
for u, i, rui in train.items():
if u notin p:
p[u] = [random.random()/math.sqrt(F) for x in range(0,F)]
if i notin q:
q[i] = [random.random()/math.sqrt(F) for x in range(0,F)]
return list(p,q)
#加入偏执项的LFM -> BiasLFMdefLearningBiasLFM(train, F, n, alpha, lamda, mu):
[bu, bi, p, q] = InitLFM(train, F)
for step in range(0, n):
for u, i, rui in train.items():
pui = Predict(u, i, p, q, bu, bi, mu)
eui = rui - pui
bu[u] += alpha * (eui - lamda * bu[u])
bi[i] += alpha * (eui - lamda * bi[i])
for f in range(0, F):
p[u][k] += alpha * (q[i][k] * eui - lamda * p[u][k])
q[i][k] += alpha * (p[u][k] * eui - lamda * q[i][k])
alpha *= 0.9return list(bu, bi, p, q)
if __name__ == '__main__':
pass