# coding: utf-8
# 利用python实现apriori算法
# In[1]:
#导入需要的库
from numpy import *
# In[2]:
def loadDataSet():
return [[1,3,4],[2,3,5],[1,2,3,5],[2,5]]
# In[3]:
def createC1(dataSet):
C1=[]
for transaction in dataSet:
for item in transaction:
if not [item] in C1:
C1.append([item])
C1.sort()
return map(frozenset,C1)
# In[4]:
#计算Ck在数据集D中的支持度,并返回支持度大于minSupport的数据集
def scanD(D,Ck,minSupport):
ssCnt={}
for tid in D:
for can in Ck:
if can.issubset(tid):
if can not in ssCnt.keys():
ssCnt[can]=1
else :
ssCnt[can]+=1
numItems=float(len(D))
retList=[]
supportData={}
for key in ssCnt:
support=ssCnt[key]/numItems
if support>= min