黑马推荐系统项目实战【三】基于用户的协同过滤 UserCF

用户物品相似度计算

users = ["User1","User2","User3","User4","User5"]
items =["Item A","Item B","Item C","Item D","Item E"]

#用户购买记录数据
datasets = [
    [1, 0, 1, 1, 0],
    [1, 0, 0, 1, 1],
    [1, 0, 1, 0, 0],
    [0, 1, 0, 1, 1],
    [1, 1, 1, 0, 1],
]
import pandas as pd
df = pd.DataFrame(datasets, columns=items,index=users)
df
Item AItem BItem CItem DItem E
User110110
User210011
User310100
User401011
User511101
from sklearn.metrics.pairwise import pairwise_distances
# 计算用户的相似度
user_similar = 1 - pairwise_distances(df.values, metric='jaccard')
user_similar = pd.DataFrame(user_similar, columns=users, index=users)
user_similar

D:\Apps\Anaconda3\lib\site-packages\sklearn\metrics\pairwise.py:1735: DataConversionWarning: Data was converted to boolean for metric jaccard
warnings.warn(msg, DataConversionWarning)

User1User2User3User4User5
User11.0000000.500.6666670.20.4
User20.5000001.000.2500000.50.4
User30.6666670.251.0000000.00.5
User40.2000000.500.0000001.00.4
User50.4000000.400.5000000.41.0
# 计算物品的相似度
item_similar = 1 - pairwise_distances(df.T.values, metric='jaccard')
item_similar = pd.DataFrame(item_similar, columns=items, index=items)
item_similar

D:\Apps\Anaconda3\lib\site-packages\sklearn\metrics\pairwise.py:1735:DataConversionWarning: Data was converted to boolean for metric jaccard
warnings.warn(msg, DataConversionWarning)

Item AItem BItem CItem DItem E
Item A1.000.2000000.750.400.400000
Item B0.201.0000000.250.250.666667
Item C0.750.2500001.000.200.200000
Item D0.400.2500000.201.000.500000
Item E0.400.6666670.200.501.000000

基于用户的协同过滤 UserCF

# 为每一个用户找到最相似的2个用户
topN_users = {}
for i in user_similar.index:
# 取出每一列数据 删除自己 按照相似度排序
    _df = user_similar.loc[i].drop([i])
    _df_sorted = _df.sort_values(ascending = False)
    top2 = list(_df_sorted.index[:2])
    topN_users[i] = top2
topN_users

{‘User1’: [‘User3’, ‘User2’],
‘User2’: [‘User4’, ‘User1’],
‘User3’: [‘User1’, ‘User5’],
‘User4’: [‘User2’, ‘User5’],
‘User5’: [‘User3’, ‘User4’]}

topN_users.items()

dict_items([(‘User1’, [‘User3’, ‘User2’]), (‘User2’, [‘User4’, ‘User1’]), (‘User3’, [‘User1’, ‘User5’]), (‘User4’, [‘User2’, ‘User5’]), (‘User5’, [‘User3’, ‘User4’])])

df.loc['User1'].index

Index([‘Item A’, ‘Item B’, ‘Item C’, ‘Item D’, ‘Item E’], dtype=‘object’)

import numpy as np
# 根据topn的相似用户构建推荐结果
re_results = {}
for user, sim_users in topN_users.items():
    re_result = set() # 当前用户的相似用户的交互过的物品集合
    for sim_user in sim_users:
        re_result = re_result.union(set(df.loc[sim_user].replace(0, np.nan).dropna().index))
        
    # 过滤掉自己交互过的物品
    re_result -= set(df.loc[user].replace(0,np.nan).dropna().index)
    re_results[user] = re_result
re_results

{‘User1’: {‘Item E’},
‘User2’: {‘Item B’, ‘Item C’},
‘User3’: {‘Item B’, ‘Item D’, ‘Item E’},
‘User4’: {‘Item A’, ‘Item C’},
‘User5’: {‘Item D’}}

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值