用python实现了基础的 Item-based collaborative filtering方法,
更多的内容可以访问我的项目:添加链接描述
import numpy as np
import pandas as pd
import json
from pandas import Series, DataFrame
import math
tdata=pd.read_csv(unicode(r'D:/推荐系统数据集/训练集与测试集/traindata.csv','utf-8'))
tdata.drop(columns='timestamp',inplace=True)
tdata['userId']=tdata['userId'].astype('str')
tdata['movieId']=tdata['movieId'].astype('str')
tdata['rating']=tdata['rating'].astype('float')
#tdata['rating']=tdata['rating'].apply(lambda x: x/5)
df1=tdata.groupby(by=['userId'])
types=tdata['userId'].unique()
info1={
}
for type in types:
df3={
}
df2=df1.get_group(type)
for i in range(len(df2)):
df3[df2.iloc[i,1]]=df2.iloc[i,2]
info1[type]=df3
info_json=json.dumps(info1,indent=4)
#print(info_json)
####利用itemcf求出物品间的相似度
train1=info1
C1=dict()
N1=dict()
for u,items in train1.items():
for i,rui in items.items():
if i not in N1.keys():
N1[i] =0
N1[i] +=1
for j,ruj in items.items():
if i == j:
continue
if i not in C1.keys():
C1.update({
i:{
j:0}})
if j not in C1[i].keys():
C1[i].update({
j: