基于MovieLens数据集的协同过滤推荐系统
from tkinter import *
import tkinter.font as tkFont
import threading
import os
from operator import itemgetter, attrgetter
from math import sqrt
import time
def load_data():
filename_user_movie = 'data/u.data'
filename_movieInfo = 'data/u.item'
user_movie = {}
for line in open(filename_user_movie,encoding='ISO-8859-1'):
(userId, itemId, rating, timestamp) = line.strip().split('\t')
user_movie.setdefault(userId,{})
user_movie[userId][itemId] = float(rating)
movies = {}
for line in open(filename_movieInfo,encoding='ISO-8859-1'):
(movieId, movieTitle) = line.split('|')[0:2]
movies[movieId] = movieTitle
return user_movie, movies
def average_rating(user):
average = 0
for u in user_movie[user].keys():
average += user_movie[user][u]
average = average * 1.0 / len(user_movie[user].keys())
return average
def calUserSim(user_movie):
# build inverse table for movie_user
movie_user = {}
for ukey in user_movie.keys():
for mkey in user_movie[ukey].keys():
if mkey not in movie_user:
movie_user[mkey] = []
movie_user[mkey].append(ukey)
# calculated co-rated movies between users
C = {}
for movie, users in movie_user.items():
for u in users:
C.setdefault(u,{})
for n in users:
if u == n:
continue
C[u].setdefault(n,[])
C[u][n].append(movie)
# calculate user similarity (perason correlation)
userSim = {}
for u in C.keys():
for n in C[u].keys():
userSim.setdefault(u,{})
userSim[u].setdefault(n,0)
average_u_rate = average_rating(u)
average_n_rate = average_rating(n)
part1 = 0
part2 = 0
part3 = 0
for m in C[u][n]:
part1 += (user_movie[u][m]-average_u_rate)*(user_movie[n][m]-average_n_rate)*1.0
part2 += pow(user_movie[u][m]-average_u_rate, 2)*1.0
part3 += pow(user_movie[n][m]-average_n_rate, 2)*1.0
part2 = sqrt(part2)
part3 = sqrt(part3)
if part2 == 0:
part2 = 0.001
if part3 == 0:
part3 = 0.001
userSim[u][n] = part1 / (part2 * part3)
return userSim
def getRecommendations(user, user_movie, movies, userSim, N):
pred = {}
interacted_items = user_movie[user].keys()
average_u_rate = average_rating(user)
sumUserSim = 0
for n, nuw in sorted(userSim[user].items(),key=itemgetter(1),reverse=True)[0:N]:
average_n_rate = average_rating(n)
for i, nrating in user_movie[n].items():
# filter movies user interacted before
if i in interacted_items:
continue
pred.setdefault(i,0)
pred[i] += nuw * (nrating - average_n_rate)
sumUserSim += nuw
for i, rating in pred.items():
pred[i] = average_u_rate + (pred[i]*1.0) / sumUserSim
# top-10 pred
pred = sorted(pred.items(), key=itemgetter(1), reverse=True)[0:10]
return pred
def thread_it(func,*args):
"""将函数打包进线程内执行"""
# 创建
t = threading.Thread(target=func, args=args)
# 守护
t.setDaemon(True)
# 启动
t.start()
user_movie, movies = load_data()
def star():
text2.delete(1.0,END)
text2.insert(1.0,'正在为该用户推荐中,请稍等...')
t=0
while t<40:
time.sleep(0.5)
text2.insert(3.0,'.')
t+=1
# Calculate user similarity
userSim = calUserSim(user_movie)
# Recommend
n=text1.get(1.0,END)
n=str.strip(n)
pred = getRecommendations('%s'%n, user_movie, movies, userSim, 20)
# display recommend result (top-10 results)
text2.delete(1.0,END)
text2.insert(1.0,'推荐结果如下:\n\n')
for i, rating in pred:
#print('电影名: %s, 评分: %s' % (movies[i], rating) )
text2.insert(1.0,'电影名: %s, 评分: %s' % (movies[i], rating)+'\n\n')
#实例化一个父界面
init_window = Tk()
#界面参数
init_window.title('电影推荐系统') #窗口名称
init_window.geometry('600x500+400+150') #窗口大小
init_window["bg"] = "skyblue" #窗口颜色
init_window.attributes("-alpha",0.8) #窗口透明度
init_window.iconbitmap("1.ico") #窗口图标
#标签
label1=Label(init_window,text="请输入用户ID",bg='skyblue',font=('隶书',15))
label1.place(relx=0.36,rely=0.05)
label2=Label(init_window,text="推荐结果",bg='skyblue',font=('隶书',15))
label2.place(relx=0.4,rely=0.37)
#文本框
text1=Text(init_window,width=60, height=4)
text1.place(relx=0.15,rely=0.12)
text2=Text(init_window,width=70, height=15,fg='steelblue')
text2.place(relx=0.1,rely=0.42)
#按钮
button1=Button(init_window,text="确定", bg="Cornsilk", width=8,height=1,command=lambda :thread_it(star))
button1.place(relx=0.44,rely=0.25)
#窗口进入循环
init_window.mainloop()