协同滤波推荐系统

本文介绍了基于《推荐系统实践》一书的协同过滤推荐系统,特别关注了书中UserCF部分的用户相似度计算程序,并提供了修正后的可运行Python代码,旨在帮助读者更好地理解和应用该算法。
摘要由CSDN通过智能技术生成

最近在看推荐系统实践这本书

整体来看,书比较偏向与工程吧

但是我发现了代码有很多错误。

这里提供给大家一个可以运行的python语言。

UserCF程序中user相似度计算程序:

def usersimilarity(train):
    #build inverse table for item_users
    item_users = dict()
    for u, items in train.items():
        for i in items:
            if i not in item_users:
                item_users[i] = set()
            item_users[i].add(u)
    #calculate co-rated items between users
#     c = [[0 for i in range(0,number)] for i in range(0,number)]
#     n = [0 for i in range(0,number)]
    c = dict()
    n = dict()
    for i, users in item_users.items():
        for u in users:
            if n.has_key(u):
                n[u]+=1
            else:
                n[u] =1
            for v in users:
                if u==v:
                    continue
                if c.has_key((u,v)):
                    c[u,v] += 1
                else:
                    c[u,v] = 1           
#     print c
    #calculated finial similarity matrix w
    w = dict()
    for u, value in c.items():
        a = u[0]
        b = u[1]
        temp = {}
        temp[b] = value/math.sqrt(n[a]*n[b])
#         for v, cuv in enumerate(related_users):
        if w.has_key(a):
            w[a] = dict(w[a].items()+temp.items())
        else:
            w[a] = temp
    return w

ItemCF的相似度计算:

def itemsimilarity(train):
    #the number is the business_id number
    c = dict()
    n = dict()
    for i, users in train.items():
        for u in users:
            if n.has_key(u):
                n[u]+=1
            else:
                n[u] =1
            for v in users:
                if u==v:
                    continue
                if c.has_key((u,v)):
                    c[u,v] += 1
                else:
                    c[u,v] = 1           
#     print c
    #calculated finial similarity matrix w
    w = dict()
    for u, value in c.items():
        a = u[0]
        b = u[1]
        temp = {}
        temp[b] = value/math.sqrt(n[a]*n[b])
#         for v, cuv in enumerate(related_users):
        if w.has_key(a):
            w[a] = dict(w[a].items()+temp.items())
        else:
            w[a] = temp
    return w

UserCF的rank计算:

def recommend(user, train, w, train2):
    try:
        rank = {}
        interacted_items = train[user]
        #print dict(enumerate(w[user]))
        #print sorted(dict(enumerate(w[user])).iteritems(), key = itemgetter(1), reverse=True)
        #print w[user]
        for v, wuv in sorted(w[user].iteritems(), key = itemgetter(1), reverse=True):
            for i, rvi in train2[v].items():
                if i in interacted_items:
                    continue
                if rank.has_key(i):
                    rank[i] += wuv*rvi
                else:
                    rank[i] = wuv*rvi
        return rank
    except Exception,ex:
        print Exception,':',ex

ItemCF的rank计算:

def recommenditem(user, train, w, train2):
    rank = {}
    ru = train2[user]
    for i,pi in ru.items():
        for j, wj in sorted(w[i].iteritems(), key = itemgetter(1), reverse=True):
            if j in ru:
                continue
            if rank.has_key(j):
                rank[j] += pi*wj
            else:
                rank[j] = pi*wj
    return rank

最后给出RMSE的计算程序:

def rmes(testtrainfunny, rank):
    count = 0
    all = 0
    for user, ranklist in rank.items():
        t = 0
        #print testtrainfunny[str(user)]
        if testtrainfunny.has_key(str(user)):
            source = testtrainfunny[str(user)]
            for v, value in ranklist.items():
                if source.has_key(v):
                    p = float(source[v])
                    t += (float(value)-p)*(float(value)-p)
                else:
                    p = 0
                    t += (float(value)-p)*(float(value)-p)
            t = math.sqrt(t)/len(ranklist)
            count += 1
            all += t
    print all
    return all/len(rank)


最后给大家一个整体的程序,这里集合的之前的程序,并且人工定义了一个例子:

帮助大家理解程序:

'''
Created on May 19, 2013

@author: Yang
'''
import TestCode
import BuilduseritemM
import math

train = dict()
train2 = dict()
traintest = dict()

train2[0] = {1:2,2:3,4:1}
train2[1] = {1:2,3:3}
train2[2] = {2:2,5:4}
train2[3] = {3:1,4:3,5:5}
 
train[0] = {1,2,4}
train[1] = {1,3}
train[2] = {2,5}
train[3] = {3,4,5}

traintest[0] = {3:2,5:3}
traintest[1] = {2:2,4:3,5:3}
traintest[2] = {1:2,2:4,3:2,4:3}
traintest[3] = {1:1,2:3}
# 
# mapuser = dict()
# mapuser[1] = 1
# mapuser[2] = 2
# mapuser[3] = 3
# mapuser[4] = 4
#compute the 
def buildpredictmatrix(train, trainfunny):
    
    w = TestCode.usersimilarity(train)
    rank = {}
    
    count = 0
    #for item in traj.find():
    for i in range(0, len(train)):   
        #userid = item['user_id']
        #mapnumber = mapuser[userid]
        result = TestCode.recommend(i, train, w, trainfunny)
        if result!={} and result!=None:
            rank[i] = result
            count +=1       
    return rank

def buildpredictmatrixitem(train, trainfunny):
    
    w = TestCode.itemsimilarity(train)
    rank = {}
    
    count = 0
    #for item in traj.find():
    for i in range(0, len(train)):   
        #userid = item['user_id']
        #mapnumber = mapuser[userid]
        result = TestCode.recommenditem(i, train, w, trainfunny)
        if result!={} and result!=None:
            rank[i] = result
            count +=1       
    return rank


def rmes(testtrainfunny, rank):
    count = 0
    all = 0
    for user, ranklist in rank.items():
        t = 0
        #print testtrainfunny[str(user)]
        if testtrainfunny.has_key(user):
            source = testtrainfunny[user]
            for v, value in ranklist.items():
                if source.has_key(v):
                    p = float(source[v])
                    t += (float(value)-p)*(float(value)-p)
                else:
                    p = 0
                    t += (float(value)-p)*(float(value)-p)
            t = math.sqrt(t)/len(ranklist)
            count += 1
            all += t
    #print all
    return all/len(rank)

rank1 = buildpredictmatrix(train, train2)
rank2 = buildpredictmatrixitem(train, train2)
result2 = rmes(traintest, rank2)
result = rmes(traintest, rank1)
print result
print result2
# (train, mapuser, trainfunny) = BuilduseritemM.buildtrainmatrix()
# print 'build the user_item matrix.'
# rank = BuilduseritemM.buildpredictmatrix(train, mapuser, trainfunny)
# print 'predict the interesting of user.'
# (testmapuser, testtrain, testtrainfunny) = BuilduseritemM.buildtestmatrix()
# print 'build the user_item test matrix.'
# result = BuilduseritemM.rmes(testtrainfunny, rank)
# print 'the result Rmse:'
# print result

#rankitem = BuilduseritemM.buildpredictmatrixitem(train, mapuser, trainfunny)
#resultitem = BuilduseritemM.rmes(testtrainfunny, rankitem)



  • 0
    点赞
  • 3
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值