#!/usr/bin/env python #coding=GBK import chardet from math import sqrt #电影打分, ltl对a电影打了4.3分..... critics = {'ltl':{'a':4.3, 'b': 3.5, 'c': 4.7} ,'oyl':{'a': 4.3, 'b': 3.5, 'd': 3.0, 'e':4.2} ,'ygq':{'b': 3.2, 'c': 2.7, 'd': 4.1, 'f':2.1} ,'mmm':{'a': 2.3, 'd': 4.3, 'e': 1.9} } #求person1, 和perso2之间的相似度 def sim_distance(prefs, person1, person2): dis = 0 for item in prefs[person1]: if item in prefs[person2]: dis += (pow(prefs[person1][item] - prefs[person2][item], 2)) return 1 / (1 + sqrt(dis)) #跟我相似度最大的前n个人 def topMatchs(prefs, person, n = 5, similarity = sim_distance): scores = [(similarity(prefs, person, other), other)for other in prefs if other != person] scores.sort() scores.reverse() return scores[0:n] #对没有看过的电影打分 def getRecommendations(prefs, person, similarity = sim_distance): totals = {} simSums = {} for other in prefs: if other == person: continue sim = similarity(prefs, person, other) if sim <= 0: continue for item in prefs[other]: if item not in prefs[person]: totals.setdefault(item, 0) totals[item] += (sim * prefs[other][item]) simSums.setdefault(item, 0) simSums[item] += sim ranks = [(total / simSums[item], item) for item, total in totals.items()] ranks.sort() ranks.reverse() return ranks ranks = getRecommendations(critics, 'ltl') for item, item2 in ranks: en = chardet.detect("item2")['encoding'] print item2, ":", item
参靠:集体智慧
由于自己手头没有数据, 这本书上推荐了一个影片打分的数据http://grouplens.org/node/73, 一共有10万行打分情况; 请自行下载并导入执行
强烈推荐这本书, 入门真的很好