一、代码实例:
import math
class UserPerference:
def __init__(self,username):
self.sku_perf_vector={}
self.username=username
def addSKUPerf(self,sku_no,perf_val):
self.sku_perf_vector.setdefault(sku_no,perf_val)
def print_info(self):
print("username:%s" %(self.username));
print(self.sku_perf_vector)
def get_all_sku_nos(self):
#return dictionary type
return self.sku_perf_vector.keys();
def calcUserBasedSimilarity(up1:UserPerference,up2:UserPerference):
'''
****使用余弦距离****
Notice:如果两个对象中维度差别很大(差集较多),计算出来结果就比较小,所以可以选取少量的top指标进行差集计算。
推荐步骤:step1:计算用户相似度(可以按照SKU或者分类或者标签来计算), step2:进行相互推荐
***下表是用户和商品喜好程度的一个矩阵表格***
---------------------------------------------------
|User/SKU |- SKU1 -|-- SKu2 -|--Sku3-|-- Sku4--|
---------------------------------------------------
| B | 3 | 32 | 32 | 64 |
---------------------------------------------------
| A | 2 | 3 | 32 | 43 |
---------------------------------------------------
基于用户推荐算法计算相似度,计算公司为:similarity=(A+B)/|A|*|B|
'''
p1_skus=up1.get_all_sku_nos();
p2_skus=up2.get_all_sku_nos();
#求sku的差集,然后补齐数据
p2_diff_coll=p1_skus-p2_skus;
p1_diff_coll=p2_skus-p1_skus;
for sku_no in p1_diff_coll:
up1.addSKUPerf(sku_no,0)
for sku_no in p2_diff_coll:
up2.addSKUPerf(sku_no, 0)
molecules=0;
denominators=0;
sku_nos=up1.get_all_sku_nos()
for sku_no in sku_nos:
molecules=molecules+up1.sku_perf_vector.__getitem__(sku_no)*up2.sku_perf_vector.__getitem__(sku_no);
denominators_up1=0;
denominators_up2=0;
for sku_no in sku_nos:
denominators_up1=denominators_up1+math.pow(up1.sku_perf_vector.__getitem__(sku_no),2)
for sku_no in sku_nos:
denominators_up2 = denominators_up2 + math.pow(up2.sku_perf_vector.__getitem__(sku_no), 2)
denominators=math.sqrt(denominators_up1)*math.sqrt(denominators_up2)
print("分之为:{},分母为:{}".format(molecules,denominators))
return molecules/denominators;
def calcItemBasedSimilarity():
'''
推荐步骤:step1:计算物品相似度, step2:根据用户偏好进行推算
数据矩阵:
基于商品的推荐算法计算相似度,计算公式为:similarity=N(A与B)/√ ̄N(A)*N(B)
'''
None
user_p1=UserPerference('liuyi');
user_p1.addSKUPerf(2,22)
user_p1.addSKUPerf(3,33)
user_p1.addSKUPerf(4,30)
'''user_p1.addSKUPerf(5,30)
user_p1.addSKUPerf(6,30)
user_p1.addSKUPerf(7,30)
user_p1.addSKUPerf(8,30)
user_p1.addSKUPerf(9,30)
'''
#user_p1.addSKUPerf(11,30)
user_p2=UserPerference('liuchen');
user_p2.addSKUPerf(2,20)
user_p2.addSKUPerf(3,30)
user_p2.addSKUPerf(4,3)
'''user_p2.addSKUPerf(5,30)
user_p2.addSKUPerf(6,30)
user_p2.addSKUPerf(7,30)
user_p2.addSKUPerf(8,30)
user_p2.addSKUPerf(9,30)
'''
#user_p2.addSKUPerf(10,30)
print(set(user_p1.sku_perf_vector));
print(set(user_p2.sku_perf_vector));
###有一种组合计算出来居然值>1,为1.00000000002
print("similarity is:{}".format(calcUserBasedSimilarity(user_p1,user_p2)));
二、运行结果:
分之为:1520,分母为:1799.2101044625108
similarity is:0.8448151754094773