'''
Created on 2014-03-05
@Author:Dior
'''
import random
import math
import operator
class SimpleTagBased():
#The constructor function
def __init__(self,filename):
self.filename=filename
#self.N=N
self.loadData()
self.randomlySplitData(0.2)
self.initStat()
self.testRecommend()
#read in the data file
def loadData(self):
print "##################load data begin#######################"
filename=self.filename
self.records={}
fi=open(filename)
lineNum=0
for line in fi:
lineNum+=1
if lineNum==1:
continue
uid,iid,tag,timestamp=line.split('\t')
uid=int(uid)-1
iid=int(iid)-1
tag=int(tag)-1
self.records.setdefault(uid,{})
self.records[uid].setdefault(iid,[])
self.records[uid][iid].append(tag)
fi.close()
print "Load data success.The total records is %d." % (lineNum)
print "The total records number is %d." % (len(self.records))
print "##################load data end#######################\n"
#Randomly split the data into training set and testing set
def randomlySplitData(self,ratio,seed=100):
print "################beginning to split data#####################"
random.seed(seed)
self.train=dict()
self.test=dict()
for u in self.records.keys():
for i in self.records[u].keys():
if random.random()<ratio:
self.test.setdefault(u,{})
self.test[u].setdefault(i,[])
for t in self.records[u][i]:
self.test[u][i].append(t)
else:
self.train.setdefault(u,{})
self.train[u].setdefault(i,[])
for t in self.records[u][i]:
self.train[u][i].append(t)
print "Split data complete."
print "The length of train set is %d,the length of test set is %d." % (len(self.train),len(self.test))
print "##################split data end#######################\n"
#Initialize the user_tags,tag_items and user_items dictionary
def initStat(self):
print "##################initstat begin#######################"
records=self.train
self.user_tags=dict()
self.tag_items=dict()
self.user_items=dict()
#TODO:
self.tag_users=dict()
#TODO
self.item_users=dict()
for u,items in records.items():
for i,tags in items.items():
for tag in tags:
#print tag
self._addValueToMat(self.user_tags,u,tag,1)
self._addValueToMat(self.tag_items,tag,i,1)
self._addValueToMat(self.user_items,u,i,1)
#TODO
self._addValueToMat(self.tag_users,tag,u,1)
#TODO
self._addValueToMat(self.item_users,i,u,1)
print "Initialize state complete."
print "The length of the user_tags is %d,the length of the tag_items is %d,the length of the user_items is %d" % (len(self.user_tags),len(self.tag_items),len(self.user_items))
print "##################initstat end#######################\n"
#The private function which is used to add value to matrix
def _addValueToMat(self,mat,index,item,value=1):
#the private function which is used to add value to matrix
if index not in mat:
mat.setdefault(index,{})
mat[index].setdefault(item,value)
else:
if item not in mat[index]:
mat[index][item]=value
else:
mat[index][item]+=value
#The precision and recall
def precisionAndRecall(self,N):
#print "##################precisionAndRecall begin#######################"
#print "Beginning calculating......"
hit=0
h_recall=0
h_precision=0
for user,items in self.test.items():
if user not in self.train:
continue
rank=self.recommend(user,N)
for item,rui in rank:
if item in items:
hit+=1
#print "The items in test set is:"
#print items
#print "The items in recommended set is:"
#print rank
h_recall+=len(items)
h_precision+=N
#print hit
#print "Calculating end....."
#print "##################precisionAndRecall end#######################"
return (hit/(h_precision*1.0)),(hit/(h_recall*1.0))
#The recommend function
def recommend(self,user,N):
recommend_items=dict()
#N=self.N
tagged_items=self.user_items[user]
for tag,wut in self.user_tags[user].items():
wut=wut*1.0/math.log(1+len(self.tag_users[tag]))
for item,wti in self.tag_items[tag].items():
wti=wti*1.0/math.log(1+len(self.item_users[item]))
if item in tagged_items:
continue
if item not in recommend_items:
recommend_items[item]=wut*wti
else:
recommend_items[item]+=wut*wti
return sorted(recommend_items.items(),key=operator.itemgetter(1),reverse=True)[0:N]
#Test recommend function
def testRecommend(self):
print "##################testRecommend begin#######################"
#precision,recall=self.precisionAndRecall()
print "%3s%20s%20s" % ('K',"recall",'precision')
for n in [5,10,20,40,60,80,160]:
precision,recall=self.precisionAndRecall(n)
print "%3d%19.3f%%%19.3f%%" % (n,recall * 100,precision * 100)
#print "The precision is %f,the recall is %f" % (precision,recall)
print "##################testRecommend end#######################\n"
if __name__=='__main__':
stb=SimpleTagBased("E:\\RecommenderSystem\\datasets\\hetrec2011-delicious-2k\\user_taggedbookmarks-timestamps.dat")
#print stb.user_items
#print stb.item_users
#for item in stb.item_users:
#if 7 in stb.item_users[item]:
#print item,stb.item_users[item][7]
SimpleTag_TFIDF++
最新推荐文章于 2021-10-02 01:34:24 发布