本人python小白。学习数据挖掘中。。现在贴出python实现apriori算法 #创建c1 def createC1(data): c1 = set() for item in data: temp = frozenset(item); for tempitem in temp: tempitem = (int)(tempitem) c1.add(tempitem) return c1 #计算支持度 def countSupport(item,data): s = 0.0 count = 0; countSum = 0; a = [] if isinstance(item,int): a.append(item) else: a.extend(item) item1 = frozenset(a) for temp in data: temp = frozenset(temp) if item1.issubset(temp): count = count+1 countSum = countSum+1 s = count/countSum return s #根据ck求lk def createLkByCk(Ck,data,minsupport): LK = [] support = [] for item in Ck: temp = countSupport(item,data) if temp>minsupport: LK.append(item)#重后面增加保持一致 support.append(temp) return LK,support #判断先验 def judge(item,Lk): #c = set(item) flag = False temp = len(item) for i in range(0,temp): #c.remove(item[i]) value = item[i] item.remove(item[i]) item.sort() flag = False #print("-----",item) for item1 in Lk: item1.sort() if item[0:temp-1]==item1[0:temp-1]: #print("可以加入",item[0:temp-1]) flag = True item.append(value) if flag<0: #print("false") return False #print("加入成功") return True #根据lk求ck+1 def createCkByLk(k,Lk): #连接操作 Ck = [] len1 = len(Lk) #l1连接c2的时候 if k==1: for i in range(0,len1-1): for j in range(i+1,len1): Ck.append([Lk[i],Lk[j]])#这种情况时不用判断先验 else: for i in range(0,len1-1): Lk[i].sort()#排序 for j in range(i+1,len1): Lk[j].sort()#排序 if Lk[i][0:k-1]==Lk[j][0:k-1] and Lk[i][-1]!=Lk[j][-1]: temp = [] temp.extend(Lk[i]) temp.append(Lk[j][-1]) if judge(temp,Lk): Ck.append(temp) return Ck def createL(data,minsupport): L = [] support = [] #创建c1 ck = createC1(data) #求出最长的购买长度 maxlen = -1 for item in data: temp = len(item) if maxlen<temp: maxlen = temp for i in range(1,maxlen): lktemp,supporttemp = createLkByCk(ck,data,minsupport) #将lktemp加入 ck = createCkByLk(i,lktemp) if len(ck)==0: return L,support L.append(lktemp) support.append(supporttemp)
数据挖掘之apriori
最新推荐文章于 2022-12-18 21:08:26 发布