基于矩阵分解MF 的 biasSVD推荐算法实现

最新推荐文章于 2024-03-13 07:45:00 发布

kennyadelaide

最新推荐文章于 2024-03-13 07:45:00 发布

阅读量883

点赞数 1

分类专栏： svd 推荐系统文章标签：推荐算法

本文链接：https://blog.csdn.net/qq_17674161/article/details/121426839

版权

推荐系统同时被 2 个专栏收录

13 篇文章 3 订阅

订阅专栏

svd

4 篇文章 0 订阅

订阅专栏

接着上一篇文章实现svd系列的biasSVD算法

上图的公式需要手动推写一遍加深印象，总体难度不大，适合在此基础之上做出相应的修改， 下一篇将继续实现svd系列的svd++ 算法，该算法增加和时间因素。采用分桶原理实现。

import numpy as np
from numpy import *
import ray
import socket
import pandas as pd
import os
from sklearn.utils import shuffle
from sklearn import preprocessing
from collections import Counter
import time
import progressbar
import matplotlib.pyplot as plt

np.seterr(divide='ignore', invalid='ignore')
'''
this is a function to come true svd model named svd++ algorithm. and using ray freamwork.
name: kenny adelaide
email: kenny13141314@163.com
time: 2021/11/17
'''

# ray.init(address='192.168.0.219:6379', _redis_password="5241590000000000")

'''
=================================================common function area==============================================================
'''


def onloaddata():
    '''
    onload data to memory.
    Returns:matirx, userno and videono
    '''
    dictionary = os.path.dirname(os.path.abspath(__file__))
    path = os.path.join(dictionary, 'data/00000005.csv')
    o_data = pd.read_csv(path)
    userno = o_data['userid'].max() + 1
    videono = o_data['videoid'].max() + 1
    return [o_data, userno, videono]


def build_score_matrix_R(data, userno, videono):
    '''
    this is common function for all algorithm-model.
    via original data to build the true score matrix.
    Args:
        data:
        userno: 用户编号最大
        videono:视频编号最大
    Returns: score matrix
    '''
    matrix = [[None] * videono] * userno
    matrix = np.array(matrix)

    # matrix = np.zeros((videono, userno))
    for index, row in data.iterrows():
        matrix[int(row['userid']), int(row['videoid'])] = float(row['score'])
    return matrix


def L2Norm(a, vector):
    result = list(np.dot(vector, vector) * a)[0][0]
    return result

'''
===============================================bias svd=================================================================
'''


def init_P_Q_B_matrix_bias(user_disms=[3, 3], item_disms=[3, 3], init_method='quadrature'):
    '''
     this is a function to create two matrix for sgd training.
    we via quadrature  distribution function.
    Args:
        user_disms: user matrix shape.
        item_disms: item matrix shape
        init_method: generating matrix approach.
    Returns: return four matrix, B matrix as the bias matrix. P,Q  is the lower dismisional matrix to fit original score-matrix.
    '''

    if str(init_method) == str('quadrature'):
        P = random.randn(user_disms[0], user_disms[1])
        Q = random.randn(item_disms[1], item_disms[0])
        B_i = random.randn(user_disms[0], 1)
        B_j = random.randn(item_disms[0], 1)
        return [P, Q, B_i, B_j]
    return


def cal_mean_rating_bias(y_matirx):
    '''
    calculate the mean score as the parameter uf u.
    Returns: return a float number-type.
    '''

    shape = y_matirx.shape
    rows, cols = np.nonzero(y_matirx != None)
    u = np.sum(y_matirx[rows, cols]) / (shape[0] * shape[1])
    return u


def gradient_bias(u, a, B_i, B_j, y_matrix, P, Q):
    '''
    via min(f(x)) to calculate the gradient about four parameters, named pi,qj,bi,bj.
    Returns:
    '''
    rows, cols = np.nonzero(y_matrix != None)
    R = np.sum(P[rows] * Q.T[cols], axis=1)
    error = list((y_matrix[rows, cols] - np.array(R)) - B_i[rows, 0] - B_j[cols, 0] - u)
    error = np.array([i for i in error]).reshape(len(rows), 1)

    gradient_p_i = -2 * error * Q[:, cols].T + 2 * a * P[rows, :]
    gradient_q_j = -2 * error * P[rows, :] + 2 * a * Q[:, cols].T
    gradient_b_i = -2 * error + 2 * a * B_i[rows]
    gradient_b_j = -2 * error + 2 * a * B_j[cols]

    return [error, gradient_p_i, gradient_q_j, gradient_b_i, gradient_b_j]


def bias_svd():
    '''
    in order to think about some low rating-defined factors, named bias.
    eg: some item defined unrelated to user about rating may reduce
    the user's rating.  detail defined as user-bias and item bias.
    Returns: cost, and iters count.
    '''

    [o_data, userno, videono] = onloaddata()
    learning_rate = 0.01
    iters = 400
    a = 0.5
    cost_arr = []
    count = 0

    [P, Q, B_i, B_j] = init_P_Q_B_matrix_bias(user_disms=[userno, 2], item_disms=[videono, 2], init_method='quadrature')
    y_matirx = build_score_matrix_R(o_data, userno, videono)

    if not isinstance(P, np.ndarray):
        P = np.array(P).around(decimals=4)
    if not isinstance(Q, np.ndarray):
        Q = np.array(Q).around(decimals=4)
    if not isinstance(y_matirx, np.ndarray):
        y_matirx = np.array(y_matirx).around(decimals=4)
    if not isinstance(y_matirx, np.ndarray):
        B_i = np.array(B_i).around(decimals=4)
    if not isinstance(y_matirx, np.ndarray):
        B_j = np.array(B_j).around(decimals=4)

    u = cal_mean_rating_bias(y_matirx)

    # to fetch the position(index) about score matrix element.
    rows, cols = np.nonzero(y_matirx != None)

    bar = progressbar
    for i in bar.progressbar(range(iters)):
        [error, gradient_p_i, gradient_q_j, gradient_b_i, gradient_b_j] = gradient_bias(u, a, B_i, B_j, y_matirx, P, Q)

        # for index in range(len(rows)):
        #     error = errors[index]
        P[rows, :] -= learning_rate * gradient_p_i
        Q[:, cols] -= learning_rate * gradient_q_j.T
        B_i[rows] -= learning_rate * gradient_b_i
        B_j[cols] -= learning_rate * gradient_b_j
        cost = np.sum(np.square(error))
        cost_arr.append(cost)
        count += 1
        if cost <= 0.001:
            break

    print(np.dot(P, Q))

    return cost_arr, count