#! /usr/bin/env python
# -*- coding: utf-8 -*-
import os
import math
import operator
# 获得每个用户浏览过的物品集合
def get_user_click(rating_file):
if not os.path.exists(rating_file):
return {}, {}
fp = open(rating_file)
num = 0
user_click = {} # 统计用户评价过的物品
for line in fp:
# 去掉标题
if num == 0:
num += 1
continue
item = line.strip().split(',')
if len(item) < 4:
continue
[user_id, item_id] = item[0:2]
if user_id not in user_click:
user_click[user_id] = []
user_click[user_id].append(item_id)
fp.close()
return user_click
# 计算物品之间的相似度
def item_similarity(user_click):
# 建立物品与物品的共现矩阵,记录同时点击了两个物品的用户数
co_matrix = dict() # 被点击的次数
co_occurrence_matrix = dict() # 同时被点击的次数
for user_id, items_id in user_click.items():
for i in items_id:
if i not in co_matrix.keys():
co_matrix[i] = 0
co_matrix[i] += 1
for j in items_id:
if i == j:
continue
if i not in co_occurrence_matrix.keys():
co_occurrence_matrix[i] = dict()
if j not in co_occurrence_matrix[i].keys():
co_occurrence_matrix[i][j] = 0
co_occurrence_matrix[i][j] += 1
# 计算物品之间的相似度
similarity = dict()
for i, related_items in co_occurrence_matrix.items():
if i not in similarity.items():
similarity[i] = dict()
for j, cij in related_items.items():
similarity[i][j] = round(cij / math.sqrt(co_matrix[i] * co_matrix[j]), 4)
return similarity
def item_recommendation(user_click, user, sim, K):
rank = dict()
interacted_items = user_click[user]
for item in interacted_items:
tmp = sim[item] # 找到要推荐的候选集合
# 找到K个相关物品以及对应相似度,按兴趣相似度从大到小排列
for j, wj in sorted(tmp.items(), key=operator.itemgetter(1), reverse=True)[0:K]:
if j not in rank.items():
rank[j] = 0
if j in interacted_items: # 如果用户已经点击过,则不再推荐
continue
# 待推荐的物品j用户已经点击的物品i相似,再加上相似的分数
rank[j] += wj * 1
return rank
if __name__:
"""
main flow of itemcf
"""
user_click = get_user_click("./data/ratings.txt")
sim = item_similarity(user_click)
Last_Rank = item_recommendation(user_click, '100', sim, 5)
ranks = sorted(Last_Rank.items(), key=operator.itemgetter(1), reverse=True)
print(ranks)
推荐系统基于物品的协同过滤代码
最新推荐文章于 2022-07-20 15:20:33 发布