#coding=utf-8 from numpy import * def loadDataSet(): return [[1,3,4],[2,3,5],[1,2,3,5],[2,5]] ''' 创建候选项集列表 ''' def createC1(dataSet): C1=[] for transaction in dataSet: for item in transaction: if not [item] in C1: C1.append([item]) C1.sort() return map(frozenset,C1) ''' D:数据集 Ck:候选项集列表 minSupport:感兴趣集的最小支持度 ''' def scanD(D,Ck,minSupport): ssCnt={} for tid in D:#统计候选项集的出现次数 for can in Ck: if can.issubset(tid): if not ssCnt.has_key(can): ssCnt[can]=1 else: ssCnt[can]+=1 numItems=float(len(D))#统计总数据集个数 retList=[] supportData={} for key in ssCnt: support=ssCnt[key]/numItems#计算支持度 if support>=minSupport:#如果支持度满足最小支持度要求,则加入字典 retList.insert(0,key) supportData[key]=support return retList,supportData def aprioriGen(Lk,k): retList=[] lenLk=len(Lk) for i in range(lenLk): for j in range(i+1,lenLk): L1=list(Lk[i])[:k-2]; L2=list(Lk[j])[:k-2] L1.sort() L2.sort() if L1 == L2: retList.append(Lk[i] | Lk[j]) return retList def apriori(dataSet,minSupport=0.5): C1=createC1(dataSet) D=map(set,dataSet) L1,supportData=scanD(D,C1,minSupport) L=[L1] k=2 while (len(L[k-2]) > 0): Ck=aprioriGen(L[k-2],k) Lk,supK=scanD(D,Ck,minSupport) supportData.update(supK) L.append(Lk) k += 1 return L,supportData def generateRules(L,supportData,minConf=0.7): bigRuleList=[] for i in range(1,len(L)): for freqSet in L[i]: H1=[frozenset([item]) for item in freqSet] if i>1: rulesFromConseq(freqSet,H1,supportData,bigRuleList,minConf) else: calcConf(freqSet,H1,supportData,bigRuleList,minConf) return bigRuleList def calcConf(freqSet,H,supportData,br1,minConf=0.7): prunedH=[] for conseq in H: conf=supportData[freqSet]/supportData[freqSet-conseq] if conf >=minConf: print freqSet-conseq,'-->',conseq,'conf:',conf br1.append((freqSet,conseq,conf)) prunedH.append(conseq) return prunedH def rulesFromConseq(freqSet,H,supported,br1,minConf=0.7): m=len(H[0]) if (len(freqSet)>(m+1)): Hmp1=aprioriGen(H,m+1) Hmp1=calcConf(freqSet,Hmp1,supportData,br1,minConf) if len(Hmp1)>1: rulesFromConseq(freqSet,Hmp1,supportData,br1,minConf) dataSet=loadDataSet() # C1=createC1(dataSet) # D=map(set,dataSet) # print D # L1,supportData0=scanD(D,C1,0.5) L,supportData=apriori(dataSet,0.5) rules=generateRules(L,supportData,0.7) print rules frozenset([1]) --> frozenset([3]) conf: 1.0 frozenset([5]) --> frozenset([2]) conf: 1.0 frozenset([2]) --> frozenset([5]) conf: 1.0 [(frozenset([1, 3]), frozenset([3]), 1.0), (frozenset([2, 5]), frozenset([2]), 1.0), (frozenset([2, 5]), frozenset([5]), 1.0)]
Apriori算法学习笔记
最新推荐文章于 2024-06-20 10:15:16 发布