Python做一个最简单的推荐系统

通过各自电影评分的情况,计算用户之间的相似度(欧氏距离、皮尔逊相似度)

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
'''
@Time    : 2018/9/15 8:19
@Author  : Negen
@Site    : 
@File    : recommend.py
@Software: PyCharm
'''
from math import sqrt
from colorama import Fore
import numpy as np
"""
建立影评评分字典
六种电影类型:a,b,c,d,e,f
七个用户:Cathy,Sophie,Susie,Antonio,Marco,Jack,Leo
"""
critics={'Cathy':{'a':2.5,'b':3.5,'c':3,'d':3.5,'e':2.5,'f':3},
         'Sophie':{'a':3,'b':3.5,'c':1.5,'d':5,'e':1.5,'f':3},
         'Susie':{'a':2.5,'b':3,'d':3.5,'f':4},
         'Antonio':{'b':3.5,'c':3,'d':4,'e':2.5,'f':4.5},
         'Marco':{'a':3,'b':4,'c':2,'d':3,'e':2,'f':3},
         'Jack':{'a':3,'b':4,'d':5,'e':3.5,'f':3},
         'Leo':{'b':4.5,'d':4,'e':1.0}}


def sim_distance(prefs, person1, person2):
    """
    计算相似度(欧氏距离)
    :param prefs:
    :param person1:
    :param person2:
    :return: person1和person2的基于距离的相似度
    """
    si = {item : 1 for item in prefs[person1] if item in prefs[person2]}
    if len(si) == 0: return 0
    sum_of_squares = sum([pow(prefs[person1][item] - prefs[person2][item], 2) for item in si])
    # sum_of_squares= sum([pow(prefs[person1][item]-prefs[person2][item],2)
    #                      for item in prefs[person1] if item in prefs[person2]])
    # print(sum_of_squares)
    return 1/(1+sqrt(sum_of_squares))


sim_dis = sim_distance(critics, 'Cathy', 'Antonio')            #基于欧氏距离的相似度
print(Fore.RED, "欧氏距离:", sim_dis, Fore.RESET)


def sim_pearson(prefs, person1, person2):
    """
    计算person1和person2的皮尔逊相关系数
    :param prefs: 数据源
    :param person1:
    :param person2:
    :return: 两person的Pearson相关系数
    """
    si = {item : 1 for item in prefs[person1] if item in prefs[person2]}
    # for item in prefs[person1]:
    #     if item in prefs[person2]: si[item] = 1
    n = len(si)
    if n == 0: return 1
    sum1 = sum([prefs[person1][it] for it in si])
    sum2 = sum([prefs[person2][it] for it in si])

    sum1Sq = sum([pow(prefs[person1][it], 2) for it in si])
    sum2Sq = sum([pow(prefs[person2][it], 2) for it in si])

    pSum = sum([prefs[person1][it]*prefs[person2][it] for it in si])

    num = pSum - (sum1*sum2/n)    #协方差cov
    den = sqrt((sum1Sq - pow(sum1,2)/n)*(sum2Sq - pow(sum2,2)/n))    #标准差std

    # print(sum1, sum2, sum1Sq, sum2Sq, pSum)
    if den == 0: return 0
    # print(num, den)
    return num/den


sim_pear = sim_pearson(critics, 'Cathy', 'Sophie')
print(Fore.RED, "皮尔逊相关度:", sim_pear, Fore.RESET)

def test():
    """
    测试皮尔逊相关度
    :return:
    """
    person_1 = [2.5, 3.5, 3, 3.5, 2.5, 3]
    person_2 = [3, 3.5, 1.5, 5, 1.5, 3]
    sum1 = sum(person_1)
    sum2 = sum(person_2)
    # 协方差
    cov = sum([(person_1[i] - sum1 / len(person_1)) * (person_2[i] - sum1 / len(person_2)) for i in range(0, len(person_1))])/(len(person_1)-1)
    print("cov:", cov)
    # std1 = sum([pow(i - sum1 / len(person_1), 2) for i in person_1])/len(person_1)
    # print(sqrt(std1),np.std(person_1))
    # std2 = sum([pow(i - sum2 / len(person_2), 2) for i in person_2])/len(person_2)
    # print(sqrt(std2), np.std(person_2), sqrt(((np.array(person_2) - np.mean(np.array(person_2)))**2).sum()/(len(person_2))))
    # # 标准差
    # std = sqrt(std1 * std2)
    # print(std,np.std(person_1)*np.std(person_2))
    # pearson =  cov/std
    # print(pearson)
    std = np.std(person_1, ddof=1)*np.std(person_2, ddof=1)
    print(std)
    pearson = cov/std
    print(pearson)
# test()


def topmatches(prefs, person, n, similarity = sim_pearson) -> list:
    scores = [(similarity(prefs, person, other), other) for other in prefs if person != other]
    scores.sort()
    scores.reverse()
    return scores[:n]


topList_pear = topmatches(critics, 'Susie', 5)

print(topList_pear)

 

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值