推荐系统基于物品的协同过滤代码

#! /usr/bin/env python
# -*- coding: utf-8 -*-

import os
import math
import operator


# 获得每个用户浏览过的物品集合
def get_user_click(rating_file):
    if not os.path.exists(rating_file):
        return {}, {}
    fp = open(rating_file)
    num = 0
    user_click = {}  # 统计用户评价过的物品
    for line in fp:
        # 去掉标题
        if num == 0:
            num += 1
            continue
        item = line.strip().split(',')
        if len(item) < 4:
            continue
        [user_id, item_id] = item[0:2]
        if user_id not in user_click:
            user_click[user_id] = []
        user_click[user_id].append(item_id)
    fp.close()
    return user_click


# 计算物品之间的相似度
def item_similarity(user_click):
    # 建立物品与物品的共现矩阵,记录同时点击了两个物品的用户数
    co_matrix = dict()  # 被点击的次数
    co_occurrence_matrix = dict()  # 同时被点击的次数
    for user_id, items_id in user_click.items():
        for i in items_id:
            if i not in co_matrix.keys():
                co_matrix[i] = 0
            co_matrix[i] += 1
            for j in items_id:
                if i == j:
                    continue
                if i not in co_occurrence_matrix.keys():
                    co_occurrence_matrix[i] = dict()
                if j not in co_occurrence_matrix[i].keys():
                    co_occurrence_matrix[i][j] = 0
                co_occurrence_matrix[i][j] += 1
    # 计算物品之间的相似度
    similarity = dict()
    for i, related_items in co_occurrence_matrix.items():
        if i not in similarity.items():
            similarity[i] = dict()
        for j, cij in related_items.items():
            similarity[i][j] = round(cij / math.sqrt(co_matrix[i] * co_matrix[j]), 4)
    return similarity


def item_recommendation(user_click, user, sim, K):
    rank = dict()
    interacted_items = user_click[user]
    for item in interacted_items:
        tmp = sim[item]  # 找到要推荐的候选集合
        # 找到K个相关物品以及对应相似度,按兴趣相似度从大到小排列
        for j, wj in sorted(tmp.items(), key=operator.itemgetter(1), reverse=True)[0:K]:
            if j not in rank.items():
                rank[j] = 0
            if j in interacted_items:  # 如果用户已经点击过,则不再推荐
                continue
            # 待推荐的物品j用户已经点击的物品i相似,再加上相似的分数
            rank[j] += wj * 1
    return rank


if __name__:
    """
    main flow of itemcf
    """
    user_click = get_user_click("./data/ratings.txt")
    sim = item_similarity(user_click)
    Last_Rank = item_recommendation(user_click, '100', sim, 5)
    ranks = sorted(Last_Rank.items(), key=operator.itemgetter(1), reverse=True)
    print(ranks)

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值