python实现apriori_实现Apriori算法(python)

1 #coding: utf-8

2

3 #利用python实现apriori算法

4

5 #In[1]:

6

7

8 #导入需要的库

9 from numpy import *

10

11

12 #In[2]:

13

14

15 defloadDataSet():16 return [[1,3,4],[2,3,5],[1,2,3,5],[2,5]]17

18

19 #In[3]:

20

21

22 defcreateC1(dataSet):23 C1=[]24 for transaction indataSet:25 for item intransaction:26 if not [item] inC1:27 C1.append([item])28 C1.sort()29 returnmap(frozenset,C1)30

31

32 #In[4]:

33

34

35 #计算Ck在数据集D中的支持度,并返回支持度大于minSupport的数据集

36 defscanD(D,Ck,minSupport):37 ssCnt={}38 for tid inD:39 for can inCk:40 ifcan.issubset(tid):41 if can not inssCnt.keys():42 ssCnt[can]=1

43 else:44 ssCnt[can]+=1

45 numItems=float(len(D))46 retList=[]47 supportData={}48 for key inssCnt:49 support=ssCnt[key]/numItems50 if support>=minSupport:51 retList.insert(0,key)52 supportData[key]=support53 returnretList,supportData54

55

56 #In[15]:

57

58

59 defaprioriGen(Lk,k):60 retList=[]61 lenLk=len(Lk)62 for i inrange(lenLk):63 for j in range(i+1,lenLk):64 L1=list(Lk[i])[:k-2]65 L2=list(Lk[j])[:k-2]66 L1.sort()67 L2.sort()68 if L1==L2:69 retList.append(Lk[i] |Lk[j])70 returnretList71

72

73

74 #In[14]:

75

76

77 def apriori(dataSet, minSupport=0.5):78 C1=createC1(dataSet)79 D=list(map(set,dataSet))80 print('D:',D)81 L1,supportData=scanD(D,C1,minSupport)82 L=[L1]83 k=2

84 while (len(L[k-2])>0):85 Ck=aprioriGen(L[k-2], k)86 Lk,supK=scanD(D,Ck,minSupport)87 supportData.update(supK)88 if len(Lk)==0:89 break

90 L.append(Lk)91 k+=1

92 returnL,supportData93

94

95 #In[19]:

96

97

98 def calConf(freqSet,H,supportData,brl,minConf=0.7):99 prunedH=[]100 for conseq inH:101 conf=supportData[freqSet]/supportData[freqSet-conseq]102 if conf >=minConf:103 print(freqSet-conseq, '-->',conseq,'conf',conf)104 brl.append((freqSet-conseq,conseq,conf))105 prunedH.append(conseq)106 returnprunedH107

108

109 #In[21]:

110

111

112 def rulesFromConseq(freqSet,H,supportData,brl,minConf=0.7):113 m=len(H[0])114 if(len(freqSet)>(m+1)):115 Hmpl=aprioriGen(H,m+1)116 Hmpl=calConf(freqSet,Hmpl,supportData,brl,minConf)117 print('Hmpl=',Hmpl)118 print('len(Hmpl)=',len(Hmpl),'len(freqSet)=',len(freqSet))119 if(len(Hmpl)>1):120 rulesFromConseq(freqSet,Hmpl,supportData,brl,minConf)121

122

123 #In[9]:

124

125

126 def generateRules(L,supportData,minConf=0.7):127 bigRuleList=[]128 for i in range(1,len(L)):129 for freqSet inL[i]:130 H1=[frozenset([item]) for item infreqSet]131 if(i>1):132 rulesFromConseq(freqSet,H1,supportData,bigRuleList,minConf)133 else:134 calConf(freqSet,H1,supportData,bigRuleList,minConf)135 returnbigRuleList136

137

138 #In[10]:

139

140

141 deftestApriori():142 dataSet=loadDataSet()143 print('dataSet:',dataSet)144 L1,supportData1=apriori(dataSet,minSupport=0.7)145 print('L(0.7):',L1)146 print('supportData(0.7):',supportData1)147 print('------------------------------------------')148 L2,supportData2=apriori(dataSet,minSupport=0.5)149 print('L(0.5):',L2)150 print('supportData(0.5:).supportData2')151 print('------------------------------------------')152

153

154 #In[11]:

155

156

157 deftestGenerateRules():158 dataSet=loadDataSet()159 L1,supportData1=apriori(dataSet,minSupport=0.2)160 print('L(0.2):',L1)161 print('minSupport(0.2):',supportData1)162 rules=generateRules(L1,supportData1,minConf=1.1)163 print('Rules:',rules)164

165

166 #In[12]:

167

168

169 defmain():170 testApriori()171 testGenerateRules()172

173

174 #In[22]:

175

176

177 if __name__=="__main__":178 main()

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值