基于矩阵分解MF 的 biasSVD推荐算法实现

接着上一篇文章实现svd系列的biasSVD算法

上图的公式需要手动推写一遍加深印象,总体难度不大,适合在此基础之上做出相应的修改, 下一篇将继续实现svd系列的svd++ 算法,该算法增加和时间因素。采用分桶原理实现。

import numpy as np
from numpy import *
import ray
import socket
import pandas as pd
import os
from sklearn.utils import shuffle
from sklearn import preprocessing
from collections import Counter
import time
import progressbar
import matplotlib.pyplot as plt

np.seterr(divide='ignore', invalid='ignore')
'''
this is a function to come true svd model named svd++ algorithm. and using ray freamwork.
name: kenny adelaide
email: kenny13141314@163.com
time: 2021/11/17
'''

# ray.init(address='192.168.0.219:6379', _redis_password="5241590000000000")

 

'''
=================================================common function area==============================================================
'''


def onloaddata():
    '''
    onload data to memory.
    Returns:matirx, userno and videono
    '''
    dictionary = os.path.dirname(os.path.abspath(__file__))
    path = os.path.join(dictionary, 'data/00000005.csv')
    o_data = pd.read_csv(path)
    userno = o_data['userid'].max() + 1
    videono = o_data['videoid'].max() + 1
    return [o_data, userno, videono]


def build_score_matrix_R(data, userno, videono):
    '''
    this is common function for all algorithm-model.
    via original data to build the true score matrix.
    Args:
        data:
        userno: 用户编号最大
        videono:视频编号最大
    Returns: score matrix
    '''
    matrix = [[None] * videono] * userno
    matrix = np.array(matrix)

    # matrix = np.zeros((videono, userno))
    for index, row in data.iterrows():
        matrix[int(row['userid']), int(row['videoid'])] = float(row['score'])
    return matrix


def L2Norm(a, vector):
    result = list(np.dot(vector, vector) * a)[0][0]
    return result



'''
===============================================bias svd=================================================================
'''


def init_P_Q_B_matrix_bias(user_disms=[3, 3], item_disms=[3, 3], init_method='quadrature'):
    '''
     this is a function to create two matrix for sgd training.
    we via quadrature  distribution function.
    Args:
        user_disms: user matrix shape.
        item_disms: item matrix shape
        init_method: generating matrix approach.
    Returns: return four matrix, B matrix as the bias matrix. P,Q  is the lower dismisional matrix to fit original score-matrix.
    '''

    if str(init_method) == str('quadrature'):
        P = random.randn(user_disms[0], user_disms[1])
        Q = random.randn(item_disms[1], item_disms[0])
        B_i = random.randn(user_disms[0], 1)
        B_j = random.randn(item_disms[0], 1)
        return [P, Q, B_i, B_j]
    return


def cal_mean_rating_bias(y_matirx):
    '''
    calculate the mean score as the parameter uf u.
    Returns: return a float number-type.
    '''

    shape = y_matirx.shape
    rows, cols = np.nonzero(y_matirx != None)
    u = np.sum(y_matirx[rows, cols]) / (shape[0] * shape[1])
    return u


def gradient_bias(u, a, B_i, B_j, y_matrix, P, Q):
    '''
    via min(f(x)) to calculate the gradient about four parameters, named pi,qj,bi,bj.
    Returns:
    '''
    rows, cols = np.nonzero(y_matrix != None)
    R = np.sum(P[rows] * Q.T[cols], axis=1)
    error = list((y_matrix[rows, cols] - np.array(R)) - B_i[rows, 0] - B_j[cols, 0] - u)
    error = np.array([i for i in error]).reshape(len(rows), 1)

    gradient_p_i = -2 * error * Q[:, cols].T + 2 * a * P[rows, :]
    gradient_q_j = -2 * error * P[rows, :] + 2 * a * Q[:, cols].T
    gradient_b_i = -2 * error + 2 * a * B_i[rows]
    gradient_b_j = -2 * error + 2 * a * B_j[cols]

    return [error, gradient_p_i, gradient_q_j, gradient_b_i, gradient_b_j]


def bias_svd():
    '''
    in order to think about some low rating-defined factors, named bias.
    eg: some item defined unrelated to user about rating may reduce
    the user's rating.  detail defined as user-bias and item bias.
    Returns: cost, and iters count.
    '''

    [o_data, userno, videono] = onloaddata()
    learning_rate = 0.01
    iters = 400
    a = 0.5
    cost_arr = []
    count = 0

    [P, Q, B_i, B_j] = init_P_Q_B_matrix_bias(user_disms=[userno, 2], item_disms=[videono, 2], init_method='quadrature')
    y_matirx = build_score_matrix_R(o_data, userno, videono)

    if not isinstance(P, np.ndarray):
        P = np.array(P).around(decimals=4)
    if not isinstance(Q, np.ndarray):
        Q = np.array(Q).around(decimals=4)
    if not isinstance(y_matirx, np.ndarray):
        y_matirx = np.array(y_matirx).around(decimals=4)
    if not isinstance(y_matirx, np.ndarray):
        B_i = np.array(B_i).around(decimals=4)
    if not isinstance(y_matirx, np.ndarray):
        B_j = np.array(B_j).around(decimals=4)

    u = cal_mean_rating_bias(y_matirx)

    # to fetch the position(index) about score matrix element.
    rows, cols = np.nonzero(y_matirx != None)

    bar = progressbar
    for i in bar.progressbar(range(iters)):
        [error, gradient_p_i, gradient_q_j, gradient_b_i, gradient_b_j] = gradient_bias(u, a, B_i, B_j, y_matirx, P, Q)

        # for index in range(len(rows)):
        #     error = errors[index]
        P[rows, :] -= learning_rate * gradient_p_i
        Q[:, cols] -= learning_rate * gradient_q_j.T
        B_i[rows] -= learning_rate * gradient_b_i
        B_j[cols] -= learning_rate * gradient_b_j
        cost = np.sum(np.square(error))
        cost_arr.append(cost)
        count += 1
        if cost <= 0.001:
            break

    print(np.dot(P, Q))

    return cost_arr, count

 

  • 1
    点赞
  • 4
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值