# 【推荐系统代码实现】实现Funk SVD代码

Funk SVD的原链接

1. 读取数据，划分训练集和测试集
2. 对训练集使用SGD进行训练，得到两个矩阵P，Q，P 的大小是（num(all user)，factor)，Q的大小是(num(all item)，factor)
3. 遍历测试集的每一个user-item对，得到预估的评分，然后与真实评分求RMSE
（仔细看看代码，挺简单的～～～）

#encoding:utf-8
'''
@author:kiki
@date:2019.03.27
'''

import pickle
import numpy as np
import matplotlib.pyplot as plt

class Funk_SVD(object):
"""
implement Funk_SVD
"""
def __init__(self, path,USER_NUM,ITEM_NUM,FACTOR):
super(Funk_SVD, self).__init__()
self.path = path
self.USER_NUM=USER_NUM
self.ITEM_NUM=ITEM_NUM
self.FACTOR=FACTOR
self.init_model()

'''
flag- train or test
sep- separator of data
random_state- seed of the random
size- rate of the train of the test
'''
np.random.seed(random_state)
with open(self.path,'r') as f:
for index,line in enumerate(f):
if index==0:
continue
rand_num=np.random.rand()
if flag=='train':
if  rand_num < size:
u,i,r,t=line.strip('\r\n').split(sep)
yield (int(u)-1,int(i)-1,float(r))
else:
if rand_num >= size:
u,i,r,t=line.strip('\r\n').split(sep)
yield (int(u)-1,int(i)-1,float(r))

def init_model(self):
self.P=np.random.rand(self.USER_NUM,self.FACTOR)/(self.FACTOR**0.5)
self.Q=np.random.rand(self.ITEM_NUM,self.FACTOR)/(self.FACTOR**0.5)

def train(self,epochs=5,theta=1e-4,alpha=0.02,beta=0.02):#500
'''
train the model
epochs- num of iterations
theta- therehold of iterations
alpha- learning rate
beta- parameter of regularization term
'''
old_e=0.0
self.cost_of_epoch=[]
for epoch in range(epochs):#SGD
print("current epoch is {}".format(epoch))
current_e=0.0
for index,d in enumerate(train_data):
u,i,r=d
pr=np.dot(self.P[u],self.Q[i])
err=r-pr
current_e+=pow(err,2) #loss term
self.P[u]+=alpha*(err*self.Q[i]-beta*self.P[u])
self.Q[i]+=alpha*(err*self.P[u]-beta*self.Q[i])
current_e+=(beta/2)*(sum(pow(self.P[u],2))+sum(pow(self.Q[i],2))) #正则项
self.cost_of_epoch.append(current_e)
print('cost is {}'.format(current_e))
if abs(current_e - old_e) < theta:
break
old_e=current_e
alpha*=0.9

def predict_rating(self,user_id,item_id):
'''
predict rating for target user of target item

user- the number of user(user_id=xuhao-1)
item- the number of item(item_id=xuhao-1)
'''
pr=np.dot(self.P[user_id],self.Q[item_id])
return pr

def recommand_list(self,user,k=10):
'''
recommand top n for target user
for rating prediction,recommand the items which socre is higer than 4/5 of max socre
'''
user_id=user-1
user_items={}
for item_id in range(self.ITEM_NUM):
continue
pr=self.predict_rating(user_id,item_id)
user_items[item_id]=pr
items=sorted(user_items.items(),key=lambda x:x[1],reverse=True)[:k]
return items

for index,d in enumerate(train_data):
u,i,r=d

def test_rmse(self):
'''
test the model and return the value of rmse
'''
rmse=.0
num=0
for index,d in enumerate(test_data):
num=index+1
u,i,r=d
pr=np.dot(self.P[u],self.Q[i])
rmse+=pow((r-pr),2)
rmse=(rmse/num)**0.5
return rmse

def show(self):
'''
show figure for cost and epoch
'''
nums=range(len(self.cost_of_epoch))
plt.plot(nums,self.cost_of_epoch,label='cost value')
plt.xlabel('# of epoch')
plt.ylabel('cost')
plt.legend()
plt.show()
pass

def save_model(self):
'''
save the model to pickle,P,Q and rmse
'''
data_dict={'P':self.P,'Q':self.Q}
f=open('funk-svd.pkl','wb')
pickle.dump(data_dict,f)
pass

'''
reload the model from local disk
'''
f=open('funk-svd.pkl','rb')
self.P=model['P']
self.Q=model['Q']
pass

if __name__=="__main__":
mf=Funk_SVD(r'D:\code\ml-100k\u.data',943,1682,50)#path,user_num,item_num,factor
mf.train()
mf.save_model()
rmse=mf.test_rmse()
print("rmse:",rmse)
user_items=mf.recommand_list(3)
print(user_items)


©️2019 CSDN 皮肤主题: 游动-白 设计师: 上身试试