【推荐系统代码实现】实现Funk SVD代码

最新推荐文章于 2024-08-25 10:41:32 发布

_ dingding_

最新推荐文章于 2024-08-25 10:41:32 发布

阅读量4.3k

点赞数

分类专栏： # 推荐系统代码实现文章标签： funk svd 推荐系统

本文链接：https://blog.csdn.net/qiqi123i/article/details/88855620

版权

推荐系统代码实现专栏收录该内容

5 篇文章 4 订阅

订阅专栏

Funk SVD的原链接
主要优化函数是：
在这里插入图片描述
使用SGD进行优化，更新公式是：

简述一下代码过程：

读取数据，划分训练集和测试集
对训练集使用SGD进行训练，得到两个矩阵P，Q，P 的大小是（num(all user)，factor)，Q的大小是(num(all item)，factor)
遍历测试集的每一个user-item对，得到预估的评分，然后与真实评分求RMSE
（仔细看看代码，挺简单的～～～）

代码如下：

#encoding:utf-8
'''
@author:kiki
@date:2019.03.27
'''

import pickle
import numpy as np
import matplotlib.pyplot as plt


class Funk_SVD(object):
	"""
	implement Funk_SVD
	"""
	def __init__(self, path,USER_NUM,ITEM_NUM,FACTOR):
		super(Funk_SVD, self).__init__()
		self.path = path
		self.USER_NUM=USER_NUM
		self.ITEM_NUM=ITEM_NUM
		self.FACTOR=FACTOR
		self.init_model()


	def load_data(self,flag='train',sep='\t',random_state=0,size=0.8):
		'''
		flag- train or test
		sep- separator of data
		random_state- seed of the random 
		size- rate of the train of the test
		'''
		np.random.seed(random_state)
		with open(self.path,'r') as f:
			for index,line in enumerate(f):
				if index==0:
					continue
				rand_num=np.random.rand()
				if flag=='train':
					if  rand_num < size:
						u,i,r,t=line.strip('\r\n').split(sep)
						yield (int(u)-1,int(i)-1,float(r))
				else:
					if rand_num >= size:
						u,i,r,t=line.strip('\r\n').split(sep)
						yield (int(u)-1,int(i)-1,float(r))

	def init_model(self):
		self.P=np.random.rand(self.USER_NUM,self.FACTOR)/(self.FACTOR**0.5)
		self.Q=np.random.rand(self.ITEM_NUM,self.FACTOR)/(self.FACTOR**0.5)

	
	def train(self,epochs=5,theta=1e-4,alpha=0.02,beta=0.02):#500
		'''
		train the model
		epochs- num of iterations
		theta- therehold of iterations
		alpha- learning rate
		beta- parameter of regularization term
		'''
		old_e=0.0
		self.cost_of_epoch=[]
		for epoch in range(epochs):#SGD
			print("current epoch is {}".format(epoch))
			current_e=0.0
			train_data=self.load_data(flag='train') #reload the train data every iteration(generator)
			for index,d in enumerate(train_data): 
				u,i,r=d
				pr=np.dot(self.P[u],self.Q[i])
				err=r-pr 
				current_e+=pow(err,2) #loss term
				self.P[u]+=alpha*(err*self.Q[i]-beta*self.P[u])
				self.Q[i]+=alpha*(err*self.P[u]-beta*self.Q[i])
				current_e+=(beta/2)*(sum(pow(self.P[u],2))+sum(pow(self.Q[i],2))) #正则项
			self.cost_of_epoch.append(current_e)
			print('cost is {}'.format(current_e))
			if abs(current_e - old_e) < theta:
				break
			old_e=current_e
			alpha*=0.9


	def predict_rating(self,user_id,item_id):
		'''
		predict rating for target user of target item

		user- the number of user(user_id=xuhao-1)
		item- the number of item(item_id=xuhao-1)
		'''
		pr=np.dot(self.P[user_id],self.Q[item_id])
		return pr

	def recommand_list(self,user,k=10):
		'''
		recommand top n for target user
		for rating prediction,recommand the items which socre is higer than 4/5 of max socre
		'''
		user_id=user-1
		user_items={}
		for item_id in range(self.ITEM_NUM):
			user_had_look = {}
			user_had_look = self.user_had_look_in_train()
			if item_id in user_had_look[user]:
			   continue
			pr=self.predict_rating(user_id,item_id)
			user_items[item_id]=pr
		items=sorted(user_items.items(),key=lambda x:x[1],reverse=True)[:k]
		return items
    
	def user_had_look_in_train(self):
		user_had_look = {}
		train_data=self.load_data(flag='train')
		for index,d in enumerate(train_data):
			u,i,r=d
			user_had_look.setdefault(u,{})
			user_had_look[u][i] = r
		return user_had_look


	def test_rmse(self):
		'''
		test the model and return the value of rmse
		'''
		rmse=.0
		num=0
		test_data=self.load_data(flag='test')
		for index,d in enumerate(test_data):
			num=index+1
			u,i,r=d
			pr=np.dot(self.P[u],self.Q[i])
			rmse+=pow((r-pr),2)
		rmse=(rmse/num)**0.5
		return rmse
	
	def show(self):
		'''
		show figure for cost and epoch
		'''
		nums=range(len(self.cost_of_epoch))
		plt.plot(nums,self.cost_of_epoch,label='cost value')
		plt.xlabel('# of epoch')
		plt.ylabel('cost')
		plt.legend()
		plt.show()
		pass

	def save_model(self):
		'''
		save the model to pickle,P,Q and rmse
		'''
		data_dict={'P':self.P,'Q':self.Q}
		f=open('funk-svd.pkl','wb')
		pickle.dump(data_dict,f)
		pass

	def read_model(self):
		'''
		reload the model from local disk
		'''
		f=open('funk-svd.pkl','rb')
		model=pickle.load(f)
		self.P=model['P']
		self.Q=model['Q']
		pass

if __name__=="__main__":
	mf=Funk_SVD(r'D:\code\ml-100k\u.data',943,1682,50)#path,user_num,item_num,factor
	mf.train()
	mf.save_model()
	rmse=mf.test_rmse()
	print("rmse:",rmse)
	user_items=mf.recommand_list(3)
	print(user_items)

使用的数据集是movielens的100k的数据，链接在此