http://blog.csdn.net/fjssharpsword/article/details/78015956
基于该篇文章中的代码优化,主要是在生成负样例上提高执行速度,代码参考如下:
# -*- coding: utf-8 -*-
'''
Created on 2017年10月16日
@author: Administrator
'''
import numpy as np
import pandas as pd
from math import exp
import time
import math
class LFM:
def __init__(self,lclass,iters,alpha,lamda,topk,ratio,traindata):
self.lclass = lclass#隐类数量,对性能有影响
self.iters = iters#迭代次数,收敛的最佳迭代次数未知
self.alpha =alpha#梯度下降步长
self.lamda = lamda#正则化参数
self.topk =topk #推荐top k项
self.ratio =ratio #正负样例比率,对性能最大影响
self.traindata=traindata
#初始化开始.....
def getUserPositiveItem(self, userid):#生成正样例
traindata=self.traindata
series = traindata[traindata['userid'] == userid]['itemid']
positiveItemList = list(series.values)
return positiveItemList
def getUserNegativeItem(self, userid):#生成负样例
traindata=self.traindata
itemLen=self.itemLen
ratio=self.ratio
userItemlist = list(set(traindata[traindata['userid'] == userid]['itemid'])) #用户评分过的物品
negativeItemList = []
count = ratio*len(userItemlist)#生成负样例的数量
for key,value in itemLen.iteritems():#itemLen.index
if count==0:
break
if key in userItemlist:
continue<