Collaborative Filtering协同过滤推荐算法


from math import sqrt

dataset={
			'Lisa Rose': {'Lady in the Water': 2.5, 
							'Snakes on a Plane': 3.5,
							'Just My Luck': 3.0, 
							'Superman Returns': 3.5, 
							'You, Me and Dupree': 2.5,
							'The Night Listener': 3.0},
			'Gene Seymour': {'Lady in the Water': 3.0, 
							'Snakes on a Plane': 3.5,
							'Just My Luck': 1.5,
							 'Superman Returns': 5.0, 
							 'The Night Listener': 3.0,
							'You, Me and Dupree': 3.5},

			'Michael Phillips': {'Lady in the Water': 2.5, 
								'Snakes on a Plane': 3.0,
								'Superman Returns': 3.5,
								 'The Night Listener': 4.0},
			'Claudia Puig': {'Snakes on a Plane': 3.5, 
							'Just My Luck': 3.0,
							'The Night Listener': 4.5, 
							'Superman Returns': 4.0,
							'You, Me and Dupree': 2.5},
			'Mick LaSalle': {'Lady in the Water': 3.0, 
							'Snakes on a Plane': 4.0,
							'Just My Luck': 2.0, 
							'Superman Returns': 3.0, 
							'The Night Listener': 3.0,
							'You, Me and Dupree': 2.0},
			'Jack Matthews': {'Lady in the Water': 3.0, 
							'Snakes on a Plane': 4.0,
							'The Night Listener': 3.0, 
							'Superman Returns': 5.0, 
							'You, Me and Dupree': 3.5},
			'Toby': {'Snakes on a Plane':4.5,
					'You, Me and Dupree':1.0,
					'Superman Returns':4.0}}
					
def similarity_score(person1,person2):
	
	# Returns ratio Euclidean distance score of person1 and person2 

	both_viewed = {}		# To get both rated items by person1 and person2

	for item in dataset[person1]:
		if item in dataset[person2]:
			both_viewed[item] = 1

		# Conditions to check they both have an common rating items	
		if len(both_viewed) == 0:
			return 0

		# Finding Euclidean distance 
		sum_of_eclidean_distance = []	

		for item in dataset[person1]:
			if item in dataset[person2]:
				sum_of_eclidean_distance.append(pow(dataset[person1][item] - dataset[person2][item],2))
		sum_of_eclidean_distance = sum(sum_of_eclidean_distance)

		return 1/(1+sqrt(sum_of_eclidean_distance))



def pearson_correlation(person1,person2):

	# To get both rated items
	both_rated = {}
	for item in dataset[person1]:
		if item in dataset[person2]:
			both_rated[item] = 1

	number_of_ratings = len(both_rated)		
	
	# Checking for number of ratings in common
	if number_of_ratings == 0:
		return 0

	# Add up all the preferences of each user
	person1_preferences_sum = sum([dataset[person1][item] for item in both_rated])
	person2_preferences_sum = sum([dataset[person2][item] for item in both_rated])

	# Sum up the squares of preferences of each user
	person1_square_preferences_sum = sum([pow(dataset[person1][item],2) for item in both_rated])
	person2_square_preferences_sum = sum([pow(dataset[person2][item],2) for item in both_rated])

	# Sum up the product value of both preferences for each item
	product_sum_of_both_users = sum([dataset[person1][item] * dataset[person2][item] for item in both_rated])

	# Calculate the pearson score
	numerator_value = product_sum_of_both_users - (person1_preferences_sum*person2_preferences_sum/number_of_ratings)
	denominator_value = sqrt((person1_square_preferences_sum - pow(person1_preferences_sum,2)/number_of_ratings) * (person2_square_preferences_sum -pow(person2_preferences_sum,2)/number_of_ratings))
	if denominator_value == 0:
		return 0
	else:
		r = numerator_value/denominator_value
		return r 

def most_similar_users(person,number_of_users):
	# returns the number_of_users (similar persons) for a given specific person.
	scores = [(pearson_correlation(person,other_person),other_person) for other_person in dataset if  other_person != person ]
	
	# Sort the similar persons so that highest scores person will appear at the first
	scores.sort()
	scores.reverse()
	return scores[0:number_of_users]

def user_reommendations(person):

	# Gets recommendations for a person by using a weighted average of every other user's rankings
	totals = {}
	simSums = {}
	rankings_list =[]
	for other in dataset:
		# don't compare me to myself
		if other == person:
			continue
		sim = pearson_correlation(person,other)
		#print ">>>>>>>",sim

		# ignore scores of zero or lower
		if sim <=0: 
			continue
		for item in dataset[other]:

			# only score movies i haven't seen yet
			if item not in dataset[person] or dataset[person][item] == 0:

			# Similrity * score
				totals.setdefault(item,0)
				totals[item] += dataset[other][item]* sim
				# sum of similarities
				simSums.setdefault(item,0)
				simSums[item]+= sim

		# Create the normalized list

	rankings = [(total/simSums[item],item) for item,total in totals.items()]
	rankings.sort()
	rankings.reverse()
	# returns the recommended items
	recommendataions_list = [recommend_item for score,recommend_item in rankings]
	return recommendataions_list
		

print user_reommendations('Toby')


探索推荐引擎内部的秘密,第 2 部分: 深入推荐引擎相关算法 - 协同过滤


探索推荐引擎内部的秘密,第 3 部分: 深入推荐引擎相关算法 - 聚类

memory-based 协同过滤(CF)方法


推荐方法总结



机器学习相关——协同过滤


Collaborative filtering 学习总结


如何利用用户标签数据


基于标签的推荐系统


基于用户的最近邻推荐







  • 1
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值