笔记:一个简单的推荐系统

https://blog.csdn.net/jiyang_1/article/details/50177281

写的很详尽,

修改成python3版本,代码如下:

import math
def load_matrix():
    matrix={}
    f=open("\\train.csv")
    columns=f.readline().split(',')

    for line in f:
        scores=line.split(',')
        for i in range(len(scores))[1:]:
            matrix[(scores[0],columns[i])]=scores[i].strip("\n")

    return  matrix
matrix=load_matrix()
print("matrix:",matrix)

def sim_distance(matrix,row1,row2):
    columns=set(map(lambda l:l[1],matrix.keys()))
    si = list(filter(lambda l: (row1, l) in matrix and matrix[(row1, l)] != "" and (row2, l) in matrix and matrix[(row2, l)] != "", columns))
    if len(si) == 0: return 0
    sum_of_distance = sum([pow(float(matrix[(row1, column)]) - float(matrix[(row2, column)]), 2) for column in si])
    return 1 / (1 + math.sqrt(sum_of_distance))
print(sim_distance(matrix, "Kai Zhou", "Shuai Ge"))


def top_matches(matrix, row, similarity=sim_distance):
    rows = set(map(lambda l: l[0], matrix.keys()))
    scores = [(similarity(matrix, row, r), r) for r in rows if r != row]
    scores.sort()
    scores.reverse()
    return scores
person = "Kai Zhou"
print("top match for:", person)
print(top_matches(matrix, person))
def transform(matrix):
    rows = set(map(lambda l: l[0], matrix.keys()))
    columns = set(map(lambda l: l[1], matrix.keys()))

    transform_matrix = {}
    for row in rows:
        for column in columns:
            transform_matrix[(column, row)] = matrix[(row, column)]
    return transform_matrix
#找到和Friends 相似的影片:
trans_matrix = transform(matrix)
print("trans:",trans_matrix)

film = "Friends"
print("top match for:", film)
print(top_matches(trans_matrix,film))

def get_recommendations(matrix, row, similarity=sim_distance):
    rows = set(map(lambda l: l[0], matrix.keys()))
    columns = set(map(lambda l: l[1], matrix.keys()))

    sum_of_column_sim = {}
    sum_of_column = {}

    for r in rows:
        if r == row: continue
        sim = similarity(matrix, row, r)
        if sim <= 0:  continue

        for c in columns:
            if matrix[(r, c)] == "": continue

            sum_of_column_sim.setdefault(c, 0)
            sum_of_column_sim[c] += sim
            sum_of_column.setdefault(c, 0)
            sum_of_column[c] += float(matrix[(r, c)]) * sim

    scores = [(sum_of_column[c] / sum_of_column_sim[c], c) for c in sum_of_column]
    scores.sort()
    scores.reverse()
    return scores

print(get_recommendations(matrix, person))

trans_matrix = transform(matrix)
print(get_recommendations(trans_matrix,  "Friends"))

 

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

MC数据局

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值