def loadDataSet():
return [[1,3,4],[2,3,5],[1,2,3,5],[2,5]]
def creatC1(dataSet):
C1=[]
C=[]
for transaction in dataSet:
C=C+transaction
C=list(set(C))
for i in C:
C1.append([i])
C1.sort()
return map(frozenset,C1)
def scanD(D,Ck,minSupport):
ssCnt={}
for tid in D:
for can in Ck:
if can.issubset(tid):
ssCnt[can]=ssCnt.get(can,0)+1
numItems=float(len(D))
retList=[]
supportData={}
for key in ssCnt:
support=ssCnt[key]/numItems
if support>=minSupport:
retList.insert(0, key)
supportData[key]=support
return retList,supportData
def apprioriGen(Lk,k):
retList=[]
lenLk=len(Lk)
for i in range(lenLk):
for j in range(i+1,lenLk):
L1=list(Lk[i])[:k-2]
L2=list(Lk[j])[:k-2]
L1.sort();L2.sort()
if L1==L2:
retList.append(Lk[i]|Lk[j])
return retList
def appriori(dataSet,minSupport=0.5):
C1=creatC1(dataSet)
D=map(set,dataSet)
L1,supportData=scanD(D, C1, minSupport)
L=[L1]
k=2
while len(L[k-2])>1:
Ck=apprioriGen(L[k-2], k)
Lk,supportk=scanD(D, Ck, minSupport)
L.append(Lk)
supportData.update(supportk)
k+=1
return L,supportData
def generateRule(item,supportData,minConf=0.7): #针对一个频繁项集 产生规则
l=[]
rule=[]
k=2
for i in list(item):
l.append([i])
List=map(frozenset,l)
for doc in List:
if (supportData[item]/supportData[item-doc])>=minConf:
print item-doc,'--->',doc,'Conf',(supportData[item]/supportData[item-doc])
rule.append(doc)
while len(rule)>0:
rule=apprioriGen(rule, k)
if len(rule)==0 or rule[0]==item:break
else:
for j in rule:
if (supportData[item]/supportData[item-j])>=minConf:
print item-j,'--->',j,'Conf',(supportData[item]/supportData[item-j])
k+=1
def generateRules(dataSet,supportData,L,minConf=0.7): #遍历所有的频繁项集 产生所有的规则
for l in L[1:]:
for item in l:
generateRule(item, supportData, minConf)
dataSet=loadDataSet()
L,supportData=appriori(dataSet)
print supportData
print L
generateRules(dataSet, supportData, L, 0.6)
Apriori算法的python实现
最新推荐文章于 2024-09-30 11:58:37 发布