《推荐系统实践》用户协同过滤userCF测试代码

# -*- coding=utf-8 -*-

import math
from operator import itemgetter

dic={'A':('a','b','d'),'B':('a','c'),'C':('b','e'),'D':('c','d','e')}  


def Usersim(dicc):
        N=dict()
        item_user=dict()
        for u,items in dicc.items():
                N[u]=len(items)
                for i in items: 
                        if i not in item_user.keys():
                                item_user[i]=set()
                        item_user[i].add(u)

        C=dict()  

        for item,users in item_user.items():
                for u in users:
                        if u not in C.keys(): 
                                C[u]=dict()
                        for v in users:
                                if u==v:
                                        continue
                                else:
                                        if v not in C[u].keys():
                                                C[u][v] = 0
                                C[u][v]+=1 

        W=dict()
        for u,related_users in C.items():
                if not u in W.keys():
                        W[u] = dict()
                for v,cuv in related_users.items():
                        W[u][v] = cuv / math.sqrt(N[u]*N[v])
        return W


def Recommend(user,dicc,W,K):
        rvi=1    
        rank=dict()
        interacted_items=dicc[user]
        for v,wuv in sorted(W[user].items(), key=itemgetter(1), reverse=True)[0:K]:
                for i in dicc[v]:   
                        if i in interacted_items:
                                continue
                        if i not in rank.keys(): 
                                rank[i]=0
                        rank[i] += wuv*rvi
        return rank


if __name__ == '__main__':
        simMetric = Usersim(dic)
        rank = Recommend('A',dic,simMetric,3)
        print(rank)

参照书上的和网上的例子,修改补充后的完整的代码。可以执行。

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
# -*- coding: utf-8 -*- import pandas as pd import numpy as np from math import sqrt critics={'Lisa Rose': {'Lady in the Water': 2.5, 'Snakes on a Plane': 3.5, 'Just My Luck': 3.0, 'Superman Returns': 3.5, 'You, Me and Dupree': 2.5, 'The Night Listener': 3.0}, 'Gene Seymour': {'Lady in the Water': 3.0, 'Snakes on a Plane': 3.5, 'Just My Luck': 1.5, 'Superman Returns': 5.0, 'The Night Listener': 3.0, 'You, Me and Dupree': 3.5}, 'Michael Phillips': {'Lady in the Water': 2.5, 'Snakes on a Plane': 3.0, 'Superman Returns': 3.5, 'The Night Listener': 4.0}, 'Claudia Puig': {'Snakes on a Plane': 3.5, 'Just My Luck': 3.0, 'The Night Listener': 4.5, 'Superman Returns': 4.0, 'You, Me and Dupree': 2.5}, 'Mick LaSalle': {'Lady in the Water': 3.0, 'Snakes on a Plane': 4.0, 'Just My Luck': 2.0, 'Superman Returns': 3.0, 'The Night Listener': 3.0, 'You, Me and Dupree': 2.0}, 'Jack Matthews': {'Lady in the Water': 3.0, 'Snakes on a Plane': 4.0, 'The Night Listener': 3.0, 'Superman Returns': 5.0, 'You, Me and Dupree': 3.5}, 'Toby': {'Snakes on a Plane':4.5,'You, Me and Dupree':1.0,'Superman Returns':4.0}} df_critics=pd.DataFrame(critics) ##欧氏距离 def sim_distance(prefs,person1,person2): si={} for item in prefs[person1]: if item in prefs[person2]: si[item]=1 if len(si)==0: return 0 sum_of_squares=sum([pow(prefs[person1][item]-prefs[person2][item],2) for item in prefs[person1] if item in prefs[person2]]) return 1/(1+sqrt(sum_of_squares)) ##numpy pandas 方法 def sim_distance2(prefs,person1,person2): return 1/(1+np.linalg.norm(prefs[person1]-prefs[person2])) ##皮尔逊相关系数 def sim_pearson(prefs,p1,p2): si={} for item in prefs[p1]: if item in prefs[p2]: si[item]=1 n=len(si) if n==0: return 1 ##对所有偏好求和 sum1=sum([prefs[p1][it] for it in si]) sum2=sum([prefs[p2][it] for it in si]) ##求平方和 sum1Sq=sum([pow(prefs[p1][it]
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值