Apriori算法

from numpy import *
def loadDataSet():
    return [[1,3,4],[2,3,5],[1,2,3,5],[2,5]]#load some data
def createCl(dataSet):
    Cl=[]
    for transaction in dataSet:
        for item in transaction:
            if not [item] in Cl:#add item if not appeared add it
                Cl.append([item])
    Cl.sort()
    #return Cl
    print(Cl)
    fz=[]
    for i in Cl:
        fz.append(frozenset(i))
    return set(fz)
def scanD(D,Ck,minSupport):
    ssCnt={}
    print(Ck)
    for tid in D:#D elements
        #print("tid" ,tid)
        for can in Ck:
            #print("can" ,can)
            if can.issubset(set(tid)):# if can is tid's subset
                if can not in ssCnt:ssCnt[can]=1#record
                else:ssCnt[can]+=1#list the num of the  k item
    numItems=float(len(D))
    #print(ssCnt)
    retList=[]
    supportData={}
    for key in ssCnt:
        support=ssCnt[key]/numItems#probility support
        if support>=minSupport:#could support
            retList.insert(0,key)#record
        supportData[key]=support#update
    return retList,supportData
def aprioriGen(LK,k):
    retList=[]#initlize
    lenLk=len(LK)#get the lenght of LK
    for i in range(lenLk):#visit
        for j in range(i+1,lenLk):#not repeat
            L1=list(LK[i])[:k-2];L2=list(LK[j])[:k-2]#select k item and keep the order from LK
            #print("L1" ,L1);print("L2" ,L2)
            #print(LK[i],LK[j])
            L1.sort();L2.sort()#assure the order
            if L1==L2:#I couldn't understand why L1should dqual to L2 if i delete this it's Ok
                retList.append(LK[i]|LK[j])#maybe I didn't find the error if you know could you please tem me 
    return retList
def apriori(dataSet,minSupport=0.5):
    Cl=createCl(dataSet)#load data
    #D=map(set,dataSet)
    L1,supportData=scanD(dataSet,Cl,minSupport)#first scan find one item
    L=[L1]#initilize
    k=2
    print(L)
    while(len(L[k-2])>0):
        Ck=aprioriGen(L[k-2],k)#new CK from old
        print("CK" ,Ck)#the k item
        Lk,supK=scanD(dataSet,Ck,minSupport)
        supportData.update(supK)
        L.append(Lk)
        k+=1
        print(L[k-2])
        print("len(L[k-2])" ,len(L[k-2]),k-2)
    return L,supportData
if __name__=='__main__':
     x=loadDataSet()
     print(apriori(x))

 

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值