Apriori算法的python实现

def loadDataSet():
    return [[1,3,4],[2,3,5],[1,2,3,5],[2,5]]

def creatC1(dataSet):
    C1=[]
    C=[]
    for transaction in dataSet:
        C=C+transaction
    C=list(set(C))
    for i in C:
        C1.append([i])
    C1.sort()
    return map(frozenset,C1)

def scanD(D,Ck,minSupport):
    ssCnt={}
    for tid in D:
        for can in Ck:
            if can.issubset(tid):
                ssCnt[can]=ssCnt.get(can,0)+1
    numItems=float(len(D))
    retList=[]
    supportData={}
    for key in ssCnt:
        support=ssCnt[key]/numItems
        if support>=minSupport:
            retList.insert(0, key)
            supportData[key]=support
    return retList,supportData

def apprioriGen(Lk,k):
    retList=[]
    lenLk=len(Lk)
    for i in range(lenLk):
        for j in range(i+1,lenLk):
            L1=list(Lk[i])[:k-2]
            L2=list(Lk[j])[:k-2]
            L1.sort();L2.sort()
            if L1==L2:
                retList.append(Lk[i]|Lk[j])
    return retList

def appriori(dataSet,minSupport=0.5):
    C1=creatC1(dataSet)
    D=map(set,dataSet)
    L1,supportData=scanD(D, C1, minSupport)
    L=[L1]
    k=2
    while len(L[k-2])>1:
        Ck=apprioriGen(L[k-2], k)
        Lk,supportk=scanD(D, Ck, minSupport)
        L.append(Lk)
        supportData.update(supportk)
        k+=1
    return L,supportData

def generateRule(item,supportData,minConf=0.7):   #针对一个频繁项集 产生规则
    l=[]
    rule=[]
    k=2
    for i in list(item):
        l.append([i])
    List=map(frozenset,l)
    for doc in List:
        if (supportData[item]/supportData[item-doc])>=minConf:
            print item-doc,'--->',doc,'Conf',(supportData[item]/supportData[item-doc])
            rule.append(doc)
    while len(rule)>0:
        rule=apprioriGen(rule, k)
        if len(rule)==0 or rule[0]==item:break
        else:
            for j in rule:
                if (supportData[item]/supportData[item-j])>=minConf:
                    print item-j,'--->',j,'Conf',(supportData[item]/supportData[item-j])
        k+=1
                
def generateRules(dataSet,supportData,L,minConf=0.7):  #遍历所有的频繁项集 产生所有的规则
    for l in L[1:]:
        for item in l:
            generateRule(item, supportData, minConf)
            

    
dataSet=loadDataSet()
L,supportData=appriori(dataSet)
print supportData
print L
generateRules(dataSet, supportData, L, 0.6)

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值