推荐算法 itemcf java_推荐系统之itemcf算法代码

#-*-coding:utf8-*-

import sys

import math

import operator

import os

def get_user_click(rating_file):

#收集用户的点击序列

if not os.path.exists(rating_file):

return {}

num = 0

user_click = {}

fp = open(rating_file,encoding='utf-8')

for line in fp:

if num == 0:

num += 1

continue

#过滤掉第一行

item = line.strip().split(',')

if len(item) < 4:

continue

#过滤掉信息不全的行

[userid,itemid,rating,timestamp] = item

if float(rating) < 3.0:

continue

#过滤掉低分评价,>=3分的表示喜欢

if userid not in user_click:

user_click[userid] = []

user_click[userid].append(itemid)

#用list方式存储信息

fp.close()

return user_click

def get_item_info(item_file):

if not os.path.exists(item_file):

return {}

num = 0

item_info = {}

fp = open(item_file,encoding='utf-8')

for line in fp:

if num == 0:

num += 1

continue

#过滤掉第一行

item = line.strip().split(',')

if len(item) < 3:

continue

if len(item) == 3:

[itemid,title,genres] = item

elif len(item) > 3:

itemid = item[0]

genres = item[-1]

title = ",".join(item[1:-1])

if itemid not in item_info:

item_info[itemid] = [title,genres]

fp.close()

return item_info

def base_contribute_score():

return 1

def cal_item_sim(user_click):

#item_sim_score(字典类型) :key-->itemid_i,value-->[itemid_j:simscore](字典类型)

#user_click(字典类型):key-->userid,value-->[itemid1,itemid2]

co_appear = {}

item_user_click_num = {}

for user,itemlist in user_click.items():

for index_i in range(0,len(itemlist)):

itemid_i = itemlist[index_i]

item_user_click_num.setdefault(itemid_i,0)

item_user_click_num[itemid_i] += 1

for index_j in range(index_i+1,len(itemlist)):

itemid_j = itemlist[index_j]

#如果用户同时点击了itemid_i,itemid_j,就增加贡献值

#One:itemid_i对itemid_j的贡献

co_appear.setdefault(itemid_i,{})

#参考注释里的sim_info

co_appear[itemid_i].setdefault(itemid_j,0)

#co_appear[itemid_i][itemid_j] += 1

co_appear[itemid_i][itemid_j] += base_contribute_score()

#Two:itemid_j对itemid_i的贡献

co_appear.setdefault(itemid_j,{})

#参考注释里的sim_info

co_appear[itemid_j].setdefault(itemid_i,0)

#co_appear[itemid_i][itemid_j] += 1

co_appear[itemid_j][itemid_i] += base_contribute_score()

#计算相似度

item_sim_score = {}

for itemid_i,relate_item in co_appear.items():

for itemid_j,co_time in relate_item.items():

#公式的分母

if (itemid_i in item_user_click_num) and (itemid_j in item_user_click_num):

fenmu = math.sqrt(item_user_click_num[itemid_i]*item_user_click_num[itemid_j])

sim_score = co_time / fenmu

else:

continue

#存储得分

item_sim_score.setdefault(itemid_i,{})

item_sim_score[itemid_i].setdefault(itemid_j,0)

item_sim_score[itemid_i][itemid_j] = sim_score

#按相似性进行排序

item_sim_score_sorted = {}

for itemid in item_sim_score:

item_sim_score_sorted[itemid] = sorted(item_sim_score[itemid].items(),key=operator.itemgetter(1),reverse=True)

return item_sim_score_sorted

def cal_recom_result(sim_info,user_click):

#result(字典类型):key-->userid,value-->[itemid:recom_score](字典类型)

recent_click_num = 3

topk = 5

recom_info1 = {}

recom_info2 = {}

for user in user_click:

click_list = user_click[user]

for itemid in click_list[:recent_click_num]:

if itemid not in sim_info:

continue

#如果不存在,跳过

for itemsim_zuhe in sim_info[itemid][:topk]:

itemsimid = itemsim_zuhe[0]

itemsimscore = itemsim_zuhe[1]

recom_info1[itemsimid] = itemsimscore

recom_info2[user] = recom_info1

return recom_info2

def main_flow():

#step1:得到用户的点击序列

user_click=get_user_click('./data/ratings.csv')

#step2:计算物品的相似度

sim_info = cal_item_sim(user_click)

#step3:根据相似度来推荐

recom_result = cal_recom_result(sim_info,user_click)

print (recom_result['1'])

if __name__ == '__main__':

main_flow()

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值