LFM算法

自定义LFM

import numpy as np


class SVD():
    def __init__(self, rating_data, F=5, alpha=0.1, lmbda=0.1, max_iter=100):
        self.F = F
        self.P = []
        self.Q = []
        # self.bu = []
        # self.bi = []
        # self.mu = 0.0
        self.alpha = alpha
        self.lmbda = lmbda
        self.max_iter = max_iter
        self.rating_data = rating_data

        # m 用户的数目, n 物品的数目
        m, n = rating_data.shape
        # 初始化P,Q矩阵
        # self.P = np.full((m, F), 1 / math.sqrt(F))
        # self.Q = np.full((n, F), 1 / math.sqrt(F))
        self.P = np.random.rand(m,F)
        self.Q = np.random.rand(n,F)
        self.Q = self.Q.T
        # self.bu = np.zeros(m)
        # self.bi = np.zeros(n)
        # self.mu = rating_data[np.nonzero(rating_data)].sum() / np.count_nonzero(rating_data)

    def train(self):
        ratings_index = np.transpose(np.nonzero(self.rating_data))
        for step in range(self.max_iter):
            sum_e_ui = 0
            for rating_index in ratings_index:
                u, i = rating_index[0:]
                rhat_ui = self.predict(u, i)
                r_ui = self.rating_data[u, i]
                e_ui = r_ui - rhat_ui
                sum_e_ui += abs(e_ui)
                # self.bu[p] += self.alpha * (e_ui - self.lmbda * self.bu[p])
                # self.bi[q] += self.alpha * (e_ui - self.lmbda * self.bi[q])
                for f in range(0, self.F):
                    self.P[u][f] += self.P[u][f] - self.alpha * (2 * e_ui * self.Q[f][i] + 2 * self.lmbda * self.P[u][f])
                    self.Q[f][i] += self.Q[f][i] - self.alpha * (2 * e_ui * self.P[u][f] + 2 * self.lmbda * self.Q[f][i])

            # self.alpha *= 0.5  # 每次迭代步长要逐步缩小
            print("第" + str(step + 1) + "轮迭代的误差为:" + str(sum_e_ui))
        print(self.P[0])
        print(self.Q.T[0])
        # rating_data_11 = np.dot(self.P[0], self.Q[0].T)
        # print(rating_data_11)
        # pd.DataFrame(train_rating_data).to_csv("./after_train_rating_data.csv")

    def predict(self, u, i):
        # rhat_ui = np.dot(self.P[p], self.Q[q].T) + self.bu[p] + self.bi[q] + self.mu
        return np.dot(self.P[u,:], self.Q[:,i])

def loadData():
    ratings_data = np.array([[4, 0, 2, 0, 1]
                            , [0, 2, 3, 0, 0]
                            , [1, 0, 2, 4, 0]
                            , [5, 0, 0, 3, 1]
                            , [0, 0, 1, 5, 1]
                            , [0, 3, 2, 4, 1]])

    return ratings_data


rating_data = loadData()
basicsvd = SVD(rating_data, F=10)
basicsvd.train()

对比

import numpy as np
R = np.array([[4, 0, 2, 0, 1]
            , [0, 2, 3, 0, 0]
            , [1, 0, 2, 4, 0]
            , [5, 0, 0, 3, 1]
            , [0, 0, 1, 5, 1]
            , [0, 3, 2, 4, 1]])
K = 5
max_iter = 5000
alpha = 0.0002
lamda = 0.004


# 核心算法
def LFM_grad_desc(R, K=2, max_iter=1000, alpha=0.0001, lamda=0.002):
    # 基本维度参数定义
    M = len(R)
    N = len(R[0])

    # P,Q初始值,随机生成
    P = np.random.rand(M, K)
    Q = np.random.rand(N, K)
    Q = Q.T

    # 开始迭代
    for step in range(max_iter):
        # 对所有的用户u、物品i做遍历,对应的特征向量Pu、Qi梯度下降
        for u in range(M):
            for i in range(N):
                # 对于每一个大于0的评分,求出预测评分误差
                if R[u][i] > 0:
                    eui = np.dot(P[u, :], Q[:, i]) - R[u][i]

                    # 代入公式,按照梯度下降算法更新当前的Pu、Qi
                    for k in range(K):
                        P[u][k] = P[u][k] - alpha * (2 * eui * Q[k][i] + 2 * lamda * P[u][k])
                        Q[k][i] = Q[k][i] - alpha * (2 * eui * P[u][k] + 2 * lamda * Q[k][i])

        # u、i遍历完成,所有特征向量更新完成,可以得到P、Q,可以计算预测评分矩阵
        predR = np.dot(P, Q)

        # 计算当前损失函数
        cost = 0
        for u in range(M):
            for i in range(N):
                if R[u][i] > 0:
                    cost += (np.dot(P[u, :], Q[:, i]) - R[u][i]) ** 2
                    # 加上正则化项
                    for k in range(K):
                        cost += lamda * (P[u][k] ** 2 + Q[k][i] ** 2)
        if cost < 0.0001:
            break

    return P, Q.T, cost


P, Q, cost = LFM_grad_desc(R, K, max_iter, alpha, lamda)

print(P)
print(Q)
print(cost)

predR = P.dot(Q.T)

print(R)
print(predR)

另一版本

import numpy as np

R = np.array([[4, 0, 2, 0, 1]
            , [0, 2, 3, 0, 0]
            , [1, 0, 2, 4, 0]
            , [5, 0, 0, 3, 1]
            , [0, 0, 1, 5, 1]
            , [0, 3, 2, 4, 1]])
K = 5
max_iter = 5000
alpha = 0.0002
lamda = 0.004


def lfm_grad_desc(R, K=2, max_iter=1000, alpha=0.0001, lamda=0.002):
    M = len(R)
    N = len(R[0])

    P = np.random.rand(M, K)
    Q = np.random.rand(N, K)
    Q = Q.T

    index_list = np.transpose(np.nonzero(R))
    for step in range(max_iter):
        # for u in range(M):
        #     for i in range(N):
        #         # 对于每一个大于0的评分,求出预测评分误差
        #         if R[u][i] > 0:
        for index in index_list:
            u, i = index[0:]
            eui = np.dot(P[u, :], Q[:, i]) - R[u][i]

            for k in range(K):
                P[u][k] = P[u][k] - alpha * (2 * eui * Q[k][i] + 2 * lamda * P[u][k])
                Q[k][i] = Q[k][i] - alpha * (2 * eui * P[u][k] + 2 * lamda * Q[k][i])

        cost = 0
        for u in range(M):
            for i in range(N):
                if R[u][i] > 0:
                    cost += (np.dot(P[u, :], Q[:, i]) - R[u][i]) ** 2
                    # 加上正则化项
                    for k in range(K):
                        cost += lamda * (P[u][k] ** 2 + Q[k][i] ** 2)
        if cost < 0.0001:
            break

    return P, Q.T, cost


P, Q, cost = lfm_grad_desc(R, K, max_iter, alpha, lamda)

print(P)
print(Q)
print(cost)

predR = P.dot(Q.T)

print(R)
print(predR)
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值