把机器学习实战上的代码敲了一遍
由于python2报错的地方修改了
仅供参考
def loadDataSet(): return [[1,3,4],[2,3,5],[1,2,3,5],[2,5]] def createC1(dataSet): C1=[] for transaction in dataSet: for item in transaction: if [item] not in C1: C1.append([item]) C1.sort() return list(map(frozenset,C1)) # 创建不可变的集合 def scanD(D,Ck,minSupport): ssCnt={} for tid in D: for can in Ck: if can.issubset(tid): if can not in ssCnt: ssCnt[can]=1 #python3里没有has_key else: ssCnt[can]+=1 numItems=float(len(D)) retList=[] supportData={} for key in ssCnt: support=ssCnt[key]/numItems if support>=minSupport: retList.insert(0,key) supportData[key]=support return retList,supportData # 测试 # C1=createC1(dataSet) # D=list(map(frozenset,dataSet)) # L1,supportData0=scanD(D,C1,0.5) # print(L1,supportData0) def aprioriGen(Lk, k): #creates Ck retList = [] lenLk = len(Lk) for i in range(lenLk): for j in range(i+1, lenLk): L1 = list(Lk[i])[:k-2]; L2 = list(Lk[j])[:k-2] L1.sort(); L2.sort() if L1==L2: retList.append(Lk[i] | Lk[j]) return retList def apriori(dataSet, minSupport = 0.5): C1 = createC1(dataSet) D=list(map(frozenset, dataSet)) L1, supportData = scanD(D, C1, minSupport) L = [L1] k = 2 while (len(L[k-2]) > 0): Ck = aprioriGen(L[k-2], k) Lk, supK = scanD(D, Ck, minSupport) supportData.update(supK) L.append(Lk) k += 1 return L, supportData #关联规则函数 def generateRules(L,supportData,minConf=0.7): bigRuleList=[] for i in range(1,len(L)): for freqSet in L[i]: H1=[frozenset([item]) for item in freqSet] if(i>1): rulesFromConseq(freqSet,H1,supportData,bigRuleList,minConf) else: calcConf(freqSet,H1,supportData,bigRuleList,minConf) return bigRuleList def calcConf(freqSet,H,supportData,brl,minConf=0.7): prunedH=[] #频繁集长度大于2再划分时用到 for conseq in H: conf=supportData[freqSet]/supportData[freqSet-conseq] if conf >=minConf: print(freqSet-conseq,'-->',conseq,',可信度为:',conf) brl.append((freqSet-conseq,conseq,conf)) prunedH.append(conseq) return prunedH def rulesFromConseq(freqSet,H,supportData,brl,minConf=0.7): m=len(H[0]) if len(freqSet)>(m+1): Hmp1=aprioriGen(H,m+1) Hmp1=calcConf(freqSet,Hmp1,supportData,brl,minConf) if(len(Hmp1)>1): rulesFromConseq(freqSet,Hmp1,supportData,brl,minConf) # 测试 dataSet=loadDataSet() L, supportData=apriori(dataSet) rules=generateRules(L,supportData,0.5)