基于物品的协同过滤
import math
import operator
class ItemBasedCF:
def __init__(self, train_file):
self.train_file = train_file
self.readData()
def readData(self):
# 读取文件,并生成用户-物品的评分
# 表和测试集
self.train = dict() # 用户-物品的评分表
for line in open(self.train_file):
#user,item,score = line.strip().split(",")
item,user, score = line.strip().split(",")
#字典 setdefault() 函数和get() 方法类似, 如果键不存在于字典中,将会添加键并将值设为默认值。
self.train.setdefault(user, {}) #输出为这种格式{'\xe7\xa5\x9e\xe8\xaf\x9d': {}, '\xe7\x8a\xaf\xe7\xbd\xaa': {}}
self.train[user][item] = int(float(score))
def ItemSimilarity(self):
fw = open('resultData/recomm.txt', 'w')
# 建立物品-物品的共现矩阵
C = dict() # 物品-物品的共现矩阵
N = dict() # 物品被多少个不同用户购买
for user, items in self.train.items():
for i in items.keys():
N.setdefault(i, 0)
N[i] += 1
C.setdefault(i, {})
for j in items.keys():
if i == j: continue
C[i].setdefault(j, 0)
C[i][j] += 1
# 计算相似度矩阵
self.W = dict()
for i, related_items in C.items():
self.W.setdefault(i, {})
for j, cij in related_items.items():
self.W[i][j] = cij / (math.sqrt(N[i] * N[j]))
remcomdict = {}
listitems = list(set(self.W.get(i).values()))
listitems.sort(reverse=True)
for key, value in sorted(self.W.get(i).items(), key=operator.itemgetter(1), reverse=True):
if value == listitems[0]:
remcomdict[key] = value
else:
break
fw.writelines(i + ':')
fw.write(str(remcomdict))
fw.write('\n')
fw.close()
Item = ItemBasedCF("data/tvresult.txt")
Item.ItemSimilarity()
格式为: