协同过滤 推荐系统实战

本文基于http://blog.csdn.net/gamer_gyt/article/details/51346159

首先感谢原作者乐于分享精神。

 做了如下修改:1、将数据划分为训练集和测试集,增加计算召回率与精确率的函数

                             2、userCF关于推荐物品感兴趣度计算方法做了修改

#-*-coding:utf-8-*-
'''
Created on 2016年5月2日

@author: Gamer Think
'''
from math import sqrt
import random
fp = open("uid_score_bid","r")

users = {}
test  = {}
random.seed()
M=1
k=1
for line in open("uid_score_bid"):
    lines = line.strip().split(",")
    if random.randint(0,M)==k:
       if lines[0] not in test:
           test[lines[0]] = {}
       test[lines[0]][lines[2]]=float(lines[1])#这是一种键中键用法
       
    
    else:
        if lines[0] not in users:
           users[lines[0]] = {}
        users[lines[0]][lines[2]]=float(lines[1])#这是一种键中键用法

#----------------新增代码段END----------------------



class recommender:
    #data:数据集,这里指users
    #k:表示得出最相近的k的近邻
    #metric:表示使用计算相似度的方法
    #n:表示推荐book的个数
    def __init__(self, data, k=12, metric='pearson', n=12):

        self.k = k
        self.n = n
        self.username2id = {}
        self.userid2name = {}
        self.productid2name = {}

        self.metric = metric
        if self.metric == 'pearson':
            self.fn = self.pearson
        if type(data).__name__ == 'dict':
            self.data = data
    def convertProductID2name(self, id):

        if id in self.productid2name:
            return self.productid2name[id]
        else:
            return id

    #定义的计算相似度的公式,用的是皮尔逊相关系数计算方法
    def pearson(self, rating1, rating2):
        sum_xy = 0
        sum_x = 0
        sum_y = 0
        sum_x2 = 0
        sum_y2 = 0
        n = 0
        for key in rating1:
            if key in rating2:
                n += 1
                x = rating1[key]
                y = rating2[key]
                sum_xy += x * y
                sum_x += x
                sum_y += y
                sum_x2 += pow(x, 2)
                sum_y2 += pow(y, 2)
        if n == 0:
            return 0
        
        #皮尔逊相关系数计算公式 
        denominator = sqrt(sum_x2 - pow(sum_x, 2) / n)  * sqrt(sum_y2 - pow(sum_y, 2) / n)
        if denominator == 0:
            return 0
        else:
            return (sum_xy - (sum_x * sum_y) / n) / denominator
    
    def computeNearestNeighbor(self, username):#计算了所有观众与user的相似度,给出一个降序表
        distances = []
        for instance in self.data:
            if instance != username:
                distance = self.fn(self.data[username],self.data[instance])
                distances.append((instance, distance))

        distances.sort(key=lambda artistTuple: artistTuple[1],reverse=True)
        return distances
    
    #推荐算法的主体函数
    def recommend(self, user):
        #定义一个字典,用来存储推荐的书单和分数
        recommendations = {}
        #计算出user与所有其他用户的相似度,返回一个list
        nearest = self.computeNearestNeighbor(user)
        # print nearest
        
        userRatings = self.data[user]
        #print   userRatings
        
        totalDistance = {}
        #得住最近的k个近邻的总距离
        '''for i in range(self.k):
            totalDistance += nearest[i][1]
        if totalDistance==0.0:
            totalDistance=1.0
        '''    
        #将与user最相近的k个人中user没有看过的书推荐给user,并且这里又做了一个分数的计算排名
        for i in range(self.k):
            
            #第i个人的与user的相似度,转换到[0,1]之间
            #weight = nearest[i][1] / totalDistance
            
            #第i个人的name
            name = nearest[i][0]

            #第i个用户看过的书和相应的打分
            neighborRatings = self.data[name]

            for artist in neighborRatings:
                if not artist in userRatings:
                    # the nearest of i and user
                    weight = nearest[i][1]
                    
                    if artist not in recommendations:
                        recommendations[artist] = (neighborRatings[artist] * weight)
                        totalDistance[artist] = (weight)
                    else:
                        recommendations[artist] = (recommendations[artist]+ neighborRatings[artist] * weight)
                        totalDistance[artist] = (totalDistance[artist] + weight)
        for reclist in recommendations.keys():
            # the rank of modified
            recommendations[reclist] =  recommendations[reclist] / totalDistance[reclist]

        recommendations = list(recommendations.items())
        recommendations = [(self.convertProductID2name(k), v)for (k, v) in recommendations]
        
        #做了一个排序
        recommendations.sort(key=lambda artistTuple: artistTuple[1], reverse = True)

        return recommendations[:self.n],nearest
    #计算召回率和精确率
    def recall(self,testt):
        hit = 0
        al  = 0
        al_N=0
        for user_a in self.data.keys():
            tu=test[user_a].keys()
            rank,ne=self.recommend(user_a)
            for items_a in rank:
                if items_a[0] in tu:
                    hit +=1
            al +=len(tu)
            al_N +=self.n
            
        return [hit / (al *1.0)],[ hit /(1.0*al_N)]

 
def adjustrecommend(id):
    bookid_list = []
    r = recommender(users)
    k,nearuser = r.recommend("%s" % id)# k is the (book , similary) while nearuser is (user ,similary)
    rec,pre, =r.recall(test)           #计算推荐系统召回率和准确率
    print rec,pre
    for i in range(len(k)):
        bookid_list.append(k[i][0])
    return bookid_list,nearuser[:15]        #bookid_lis


bookid_list,near_list = adjustrecommend("changanamei")
print '.................................................................'
#print ("bookid_list:",bookid_list)
#print ("near_list:",near_list)


  • 1
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值