basic apriori method

#=====================================================================
#                    data and parameters                             |
#=====================================================================
db = {10:('a','c','d'),
      20:('b','c','e'),
      30:('a','b','c','e'),
      40:('b','e')}
db1 =[set(t) for t in db.values()]
min_sup = 2
#=====================================================================
#                    main function                                   |
#=====================================================================
#---------------------------------------------------------------------
#                    scan DB once to get frequent 1-itemset         |
#---------------------------------------------------------------------
#count
table = {}
for t in db.values():
    for k in t:
        table[k] = table.get(k,0)+1 #remember~!
#find frequent ones
ntable = {}
for t in table:
    if table[t]>=min_sup:
        ntable[t]=table[t]
#---------------------------------------------------------------------
#                    Generate length (k+1) candidate itemsets       |
#---------------------------------------------------------------------
nlist = ntable.keys()
q = 1
while(len(nlist)>0):
    #---------------------------------------------------------------------
    #                    #Step 1: self-joining                           |
    #---------------------------------------------------------------------
    print "-"*50
    print "this is the ",q,"th iteration."
    q += 1
    candidates = []
    print "item list: ", nlist
    
    for k1 in range(len(nlist)):
        for k2 in range(k1+1,len(nlist)):
            a = nlist[k1]
            b = nlist[k2]
            if a[:-1]==b[:-1]:
                c=''
                if a[-1]<b[-1]:
                    c=a[:-1]+a[-1]+b[-1]
                else:
                    c=a[:-1]+b[-1]+a[-1]
                candidates.append(c)
    #---------------------------------------------------------------------
    #                    #Step 2: pruning                                |
    #---------------------------------------------------------------------
    print 'candidates(after self-joining):',candidates
    cp = []#candidates pruned
    for c in candidates:
        flag = True
        for k in range(len(c)):
            sub = c[:k]+c[k+1:]
            if sub not in ntable.keys():
                flag = False
                break
        if flag:
            m = set()
            for n in c:
                m.add(n)
            cp.append(m)
    print 'candidates(after pruning):',cp
    #---------------------------------------------------------------------
    #                    Test the candidates against DB                 |
    #---------------------------------------------------------------------
    def setToStr(s):
        """
        change set to ordered string
        """
        l = sorted(list(s))
        r = ''
        for t in l:
            r+=t
        return r
    
    table = {}
    for c in cp:
        for t in db1:
            if c.issubset(t):
                table[setToStr(c)] = table.get(setToStr(c),0)+1
    print 'candidates with frequency:',table
    
    ntable = {}
    for t in table:
        if table[t]>=min_sup:
            ntable[t]=table[t]
    print 'current frequent pattern:',ntable
    nlist = ntable.keys()

注意:这个版本的apriori算法只是初级版本,输入'a','b'单个字符,对于“123”,‘234’,不能用。


  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值