import collections
min_support = 3
def sp(s):
return [a for a in s]
#=====================================================================
# main funtnion |
#=====================================================================
#---------------------------------------------------------------------
# generate ordered frequent items |
#---------------------------------------------------------------------
db = [(100,sp("facdgimp")),(200,sp("abcflmo")),(300,sp("bfhjo")),(400,sp("bcksp")),(500,"afcelpmn")]
tn = {}#item:number
for t in db:
for i in t[1]:
tn[i] = tn.get(i,0)+1
ftn ={}#frequent_item:number
for k in tn:
if tn[k]>=min_support:
ftn[k] = tn[k]
f_list = collections.OrderedDict(sorted(ftn.items(),key=lambda x:x[1], reverse=True))
print f_list
dbo = []#(tid,(Ordered)frequent items)
for t in db:
a = {}
for i in t[1]:
if i in f_list.keys():
a[i] = f_list[i]
b = []
for i in f_list.keys():
if i in a.keys():
b.append(i)
dbo.append((t[0],b))
#dbo.append((t[0],[b[0] for b in sorted(a,key=lambda x:x[1],reverse=True)]))#problem here~!
print 'dbo', dbo
#---------------------------------------------------------------------
# construct fp-tree |
#---------------------------------------------------------------------
'''
class HeaderTableItem():
def __init__(self,item,frequency,head=None):
self.item = item
self.frequency = frequency
self.head = head
def __repr__(self):
return str(self.__dict__)
'''
header_table = {}
for k in f_list:
# header_table.append(HeaderTableItem(k,f_list[k]))
header_table[k]=[f_list[k],None]
print header_table
class Tree():
def __init__(self,value,next_same=None):
self.value = value
self.next_same = next_same
self.children = {}
def show(self,level):
print ' '*level*4,self.value
for k in self.children:
self.children[k].show(level+1)
def __repr__(self):
return str(self.value)
'''
root = Tree("root")
root.children.append(Tree("a"))
root.children.append(Tree("b"))
root.children[0].children.append(Tree("c"))
root.show(0)
'''
def insert_tree(l,t):
if l[0] in [c.value for c in t.children]:
root = Tree("")
fp_growth
最新推荐文章于 2021-10-23 12:47:16 发布