推荐算法(userBased, itemBased)

#!/usr/bin/python
from math import sqrt

def genUserBasedMap(file = 'u.data'):
    map = {}
    f = open(file)
    for line in f:
        (user, item, rate) = line.split('\t')[0:3]
        map.setdefault(int(user), {})
        map[int(user)][int(item)] = int(rate)
    f.close()
    return map

def genItemBasedMap(file = 'u.data'):
    map = {}
    f = open(file)
    for line in f:
        (user, item, rate) = line.split('\t')[0:3]
        map.setdefault(int(item), {})
        map[int(item)][int(user)] = int(rate)
    f.close()
    return map

def userBased(map, person, n=5, similarity=pearson):
    items = {}
    itemsSim = {}
    for p in map:
        if(p == person): continue
        score = distance(map, p, person, similarity)
        if(score <= 0): continue
        for i in map[p]:
            if(i != 0 and i != None and i not in map[person]):
                items.setdefault(i, 0)
                itemsSim.setdefault(i, 0)
                items[i] += score * map[p][i]
                itemsSim[i] += score
    
    #normalize the items 
    rankings = [(total/itemsSim[item], item) for item, total in items.items()]
    rankings.sort()
    rankings.reverse()
    return rankings[0:n]

def itemBased(map, item, n = 5, similarity=pearson):
    score = []
    for i in map:
        if i == item: continue
        score.append((distance(map, item, i, similarity), i))
        
    score.sort()
    score.reverse()
    return score[0:n]
    
def distance(map, p1, p2, similarity=cosine):
    si = {}
    for item in map[p1]:
        if item in map[p2]:
            si[item] = 1
    if len(si) == 0: return 0
    
    # calc the distance
    v1 = [map[p1][i] for i in si]
    v2 = [map[p2][i] for i in si]
    distance = similarity(v1, v2)
    return distance

#different similarity functions
def euclidean(v1, v2):
    length = min(len(v1), len(v2))
    if length == 0: return 0
    
    d = 0
    for i in range(length):
        d += pow((v1[i] - v2[i]), 2)
    #return sqrt(d)
    return 1 / float(1+d)

def cosine(v1, v2):
    length = min(len(v1), len(v2))
    if length == 0: return 0
    
    dp = 0 #dot product
    m1 = 0 #modulus of v1
    m2 = 0 #modulus of v2
    for i in range(length):
        dp += v1[i] * v2[i]
        m1 += v1[i] * v1[i]
        m2 += v2[i] * v2[i]
    
    if m1 == 0 or m2 == 0: return 0
    distance = dp / (sqrt(m1) * sqrt(m2))
    return distance

def pearson(v1, v2):
    length = min(len(v1), len(v2))
    if length == 0: return 0
    
    #e of v1 v2
    e1 = 0
    e2 = 0
    for i in range(length):
        e1 += v1[i]
        e2 += v2[i]
    e1 /= float(length)
    e2 /= float(length)
    
    cov = 0 #cov of v1 v2
    d1 = 0 #variance of v2
    d2 = 0 #variance of v2
    for i in range(length):
        diff1 = v1[i] - e1
        diff2 = v2[i] - e2
        cov += diff1 * diff2
        d1 += diff1 * diff1
        d2 += diff2 * diff2
    cov /= float(length)
    d1 /= float(length)
    d2 /= float(length)
    
    if d1 == 0 or d2 == 0: return 0
    return cov / sqrt(d1 * d2)


评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值