课程 Python代码:
__author__ = 'LiFeiteng(Email: lifeiteng0422@gmail.com)'
# -*- coding: utf-8 -*-
import numpy as np
class UserUserRec:
def __init__(self):
self.U = 0 # user number
self.M = 0 # movie number
self.user_dict = {}
self.movie_dict = {}
self.movie_title = {}
self.user_ratings = np.matrix([])
def GetRatingData(self, ratings_file):
for line in open(ratings_file):
user, movie, rating = line.split(",")
if not self.user_dict.has_key(user):
self.user_dict[user] = self.U
self.U += 1
if not self.movie_dict.has_key(movie):
self.movie_dict[movie] = self.M
self.M += 1
print self.U, self.M
self.user_ratings = np.matrix(np.zeros([self.U, self.M]))
for line in open("ratings.csv", "r"):
user, movie, rating = line.split(",")
self.user_ratings[self.user_dict[user], self.movie_dict[movie]] = np.double(rating)
def GetMovieTitles(self, movie_titles_file):
for line in open(movie_titles_file):
movie, title = line.split(",")
#delete '\n'
self.movie_title[movie] = title[:-1]
def CosineUserSim(self, user1, user2):
'''用户相似性计算 useri 为评分矩阵对应的行号'''
user_rat = self.user_ratings[user1,:].copy()
u1 = user_rat - np.mean(user_rat[user_rat>0.0])
u1 = np.array(u1)*np.array(np.where(user_rat>0, 1, 0))
user_rat = self.user_ratings[user2,:].copy()
u2 = user_rat - np.mean(user_rat[user_rat>0.0])
u2 = np.array(u2)*np.array(np.where(user_rat>0, 1, 0))
if (np.linalg.norm(u1[0,:])*np.linalg.norm(u2[0,:])) == 0:
sim = 0.0
else:
sim = np.dot(u1[0,:],u2[0,:])/(np.linalg.norm(u1[0,:])*np.linalg.norm(u2[0,:]))
return np.double(sim)
def MovieScore4User(self, user, movie):
'''基于用户的推荐,根据user最相似的30位其他用户预测user对movie的rating'''
rating4movie = self.user_ratings[:, self.movie_dict[movie]]
Temp = []
userID = 0
for rating in rating4movie:
if rating != 0.0:
Temp.append([userID, rating, self.CosineUserSim(self.user_dict[user], userID)])
userID += 1
Temp = sorted(Temp, key=lambda e:e[2], reverse=True)
n = 0
sim_add = 0.0
score4movie = 0.0
for data in Temp:
if n >= 30:
break
userID = data[0]
rat = data[1]
if userID != self.user_dict[user] and rat != 0.0:
sim = data[2]
user_rat = self.user_ratings[userID,:].copy()
mu = np.mean(user_rat[user_rat > 0.0])
score4movie += (rat-mu) * sim
sim_add += np.abs(sim)
n += 1
score4movie /= sim_add
user_rat = self.user_ratings[self.user_dict[user],:].copy()
score4movie += np.mean(user_rat[user_rat > 0.0])
score4movie = np.double(score4movie)
print ",".join([user, movie, format(score4movie,".4f"), self.movie_title[movie]])
return score4movie
# end of class UserUserRec
if __name__ == '__main__':
#### PA3
user_user_rec = UserUserRec()
user_user_rec.GetRatingData("ratings.csv")
user_user_rec.GetMovieTitles("movie-titles.csv")
outfile = open("outfile.txt","w")
for line in open("input.txt"):# input
user, movie = line.split(":")
movie = str(int(movie))
score = user_user_rec.MovieScore4User(user, movie)
str1 = ",".join([user, movie, format(score, ".4f"), user_user_rec.movie_title[movie]])
outfile.write(str1+"\n")
outfile.close()
代码数据连接:https://www.dropbox.com/s/78ifrycp9x1238i/UserUserRec.rar