自定义LFM
import numpy as np
class SVD():
def __init__(self, rating_data, F=5, alpha=0.1, lmbda=0.1, max_iter=100):
self.F = F
self.P = []
self.Q = []
# self.bu = []
# self.bi = []
# self.mu = 0.0
self.alpha = alpha
self.lmbda = lmbda
self.max_iter = max_iter
self.rating_data = rating_data
# m 用户的数目, n 物品的数目
m, n = rating_data.shape
# 初始化P,Q矩阵
# self.P = np.full((m, F), 1 / math.sqrt(F))
# self.Q = np.full((n, F), 1 / math.sqrt(F))
self.P = np.random.rand(m,F)
self.Q = np.random.rand(n,F)
self.Q = self.Q.T
# self.bu = np.zeros(m)
# self.bi = np.zeros(n)
# self.mu = rating_data[np.nonzero(rating_data)].sum() / np.count_nonzero(rating_data)
def train(self):
ratings_index = np.transpose(np.nonzero(self.rating_data))
for step in range(self.max_iter):
sum_e_ui = 0
for rating_index in ratings_index:
u, i = rating_index[0:]
rhat_ui = self.predict(u, i)
r_ui = self.rating_data[u, i]
e_ui = r_ui - rhat_ui
sum_e_ui += abs(e_ui)
# self.bu[p] += self.alpha * (e_ui - self.lmbda * self.bu[p])
# self.bi[q] += self.alpha * (e_ui - self.lmbda * self.bi[q])
for f in range(0, self.F):
self.P[u][f] += self.P[u][f] - self.alpha * (2 * e_ui * self.Q[f][i] + 2 * self.lmbda * self.P[u][f])
self.Q[f][i] += self.Q[f][i] - self.alpha * (2 * e_ui * self.P[u][f] + 2 * self.lmbda * self.Q[f][i])
# self.alpha *= 0.5 # 每次迭代步长要逐步缩小
print("第" + str(step + 1) + "轮迭代的误差为:" + str(sum_e_ui))
print(self.P[0])
print(self.Q.T[0])
# rating_data_11 = np.dot(self.P[0], self.Q[0].T)
# print(rating_data_11)
# pd.DataFrame(train_rating_data).to_csv("./after_train_rating_data.csv")
def predict(self, u, i):
# rhat_ui = np.dot(self.P[p], self.Q[q].T) + self.bu[p] + self.bi[q] + self.mu
return np.dot(self.P[u,:], self.Q[:,i])
def loadData():
ratings_data = np.array([[4, 0, 2, 0, 1]
, [0, 2, 3, 0, 0]
, [1, 0, 2, 4, 0]
, [5, 0, 0, 3, 1]
, [0, 0, 1, 5, 1]
, [0, 3, 2, 4, 1]])
return ratings_data
rating_data = loadData()
basicsvd = SVD(rating_data, F=10)
basicsvd.train()
对比
import numpy as np
R = np.array([[4, 0, 2, 0, 1]
, [0, 2, 3, 0, 0]
, [1, 0, 2, 4, 0]
, [5, 0, 0, 3, 1]
, [0, 0, 1, 5, 1]
, [0, 3, 2, 4, 1]])
K = 5
max_iter = 5000
alpha = 0.0002
lamda = 0.004
# 核心算法
def LFM_grad_desc(R, K=2, max_iter=1000, alpha=0.0001, lamda=0.002):
# 基本维度参数定义
M = len(R)
N = len(R[0])
# P,Q初始值,随机生成
P = np.random.rand(M, K)
Q = np.random.rand(N, K)
Q = Q.T
# 开始迭代
for step in range(max_iter):
# 对所有的用户u、物品i做遍历,对应的特征向量Pu、Qi梯度下降
for u in range(M):
for i in range(N):
# 对于每一个大于0的评分,求出预测评分误差
if R[u][i] > 0:
eui = np.dot(P[u, :], Q[:, i]) - R[u][i]
# 代入公式,按照梯度下降算法更新当前的Pu、Qi
for k in range(K):
P[u][k] = P[u][k] - alpha * (2 * eui * Q[k][i] + 2 * lamda * P[u][k])
Q[k][i] = Q[k][i] - alpha * (2 * eui * P[u][k] + 2 * lamda * Q[k][i])
# u、i遍历完成,所有特征向量更新完成,可以得到P、Q,可以计算预测评分矩阵
predR = np.dot(P, Q)
# 计算当前损失函数
cost = 0
for u in range(M):
for i in range(N):
if R[u][i] > 0:
cost += (np.dot(P[u, :], Q[:, i]) - R[u][i]) ** 2
# 加上正则化项
for k in range(K):
cost += lamda * (P[u][k] ** 2 + Q[k][i] ** 2)
if cost < 0.0001:
break
return P, Q.T, cost
P, Q, cost = LFM_grad_desc(R, K, max_iter, alpha, lamda)
print(P)
print(Q)
print(cost)
predR = P.dot(Q.T)
print(R)
print(predR)
另一版本
import numpy as np
R = np.array([[4, 0, 2, 0, 1]
, [0, 2, 3, 0, 0]
, [1, 0, 2, 4, 0]
, [5, 0, 0, 3, 1]
, [0, 0, 1, 5, 1]
, [0, 3, 2, 4, 1]])
K = 5
max_iter = 5000
alpha = 0.0002
lamda = 0.004
def lfm_grad_desc(R, K=2, max_iter=1000, alpha=0.0001, lamda=0.002):
M = len(R)
N = len(R[0])
P = np.random.rand(M, K)
Q = np.random.rand(N, K)
Q = Q.T
index_list = np.transpose(np.nonzero(R))
for step in range(max_iter):
# for u in range(M):
# for i in range(N):
# # 对于每一个大于0的评分,求出预测评分误差
# if R[u][i] > 0:
for index in index_list:
u, i = index[0:]
eui = np.dot(P[u, :], Q[:, i]) - R[u][i]
for k in range(K):
P[u][k] = P[u][k] - alpha * (2 * eui * Q[k][i] + 2 * lamda * P[u][k])
Q[k][i] = Q[k][i] - alpha * (2 * eui * P[u][k] + 2 * lamda * Q[k][i])
cost = 0
for u in range(M):
for i in range(N):
if R[u][i] > 0:
cost += (np.dot(P[u, :], Q[:, i]) - R[u][i]) ** 2
# 加上正则化项
for k in range(K):
cost += lamda * (P[u][k] ** 2 + Q[k][i] ** 2)
if cost < 0.0001:
break
return P, Q.T, cost
P, Q, cost = lfm_grad_desc(R, K, max_iter, alpha, lamda)
print(P)
print(Q)
print(cost)
predR = P.dot(Q.T)
print(R)
print(predR)