参考了Yehuda Koren 08年的论文Factorization Meets the Neighborhood: a Multifaceted Collaborative Filtering Model
代码如下:
'''
Version:1.0
Created on 2014-02-25
@Author:Dior
'''
import random
import math
import cPickle as pickle
class SVD():
def __init__(self,allfile,trainfile,testfile,factorNum=10):
#all data file
self.allfile=allfile
#training set file
self.trainfile=trainfile
#testing set file
self.testfile=testfile
#get factor number
self.factorNum=factorNum
#get user number
self.userNum=self.getUserNum()
#get item number
self.itemNum=self.getItemNum()
#learning rate
self.learningRate=0.01
#the regularization lambda
self.regularization=0.05
#initialize the model and parameters
self.initModel()
#get user number function
def getUserNum(self):
file=self.allfile
cnt=0
userSet=set()
for line in open(file):
user=line.split('\t')[0].strip()
if user not in userSet:
userSet.add(user)
cnt+=1
return cnt
#get item number function
def getItemNum(self):
file=self.allfile
cnt=0
itemSet=set()
for line in open(file):
item=line.split('\t')[1].strip()
if item not in itemSe