def loadDataSet():
D = [[1,3,4],[2,3,5],[1,2,3,5],[2,5]]
return D
D = loadDataSet()
D
#输出
[[1, 3, 4], [2, 3, 5], [1, 2, 3, 5], [2, 5]]
#辅助函数
def createC1 (D):
C1 = []
for i in D:
for j in i:
if not {j} in C1:
C1.append({j})
return list(map(frozenset,C1)) #frozenset(不可变集合)
C1 = createC1(D)
C1
[frozenset({1}),
frozenset({3}),
frozenset({4}),
frozenset({2}),
frozenset({5})]
#辅助函数
def scanD(D,Ck,minSupport):#D,原始数据集;Ck,候选项集;minSupport:最小支持度
S = {}
for i in D:
for j in Ck:
if j.issubset(i): #issubset用于检查j史否认为i的子集
if j not in S.keys():
S[j] = 1
else:
S[j] += 1
numItems = float(len(D)) #事务总量
L = []
N = []
supportData = {}
for key in S:
support = S[key] / numItems
supportData[key] = support
if support >= minSupport:
L.append(key)
else:
N.append(key)
return L,N,supportData
L,N,supportData = scanD(D,C1,0.5)
L
[frozenset({1}), frozenset({3}), frozenset({2}), frozenset({5})]
N
[frozenset({4})]
supportData
{frozenset({1}): 0.5,
frozenset({3}): 0.75,
frozenset({4}): 0.25,
frozenset({2}): 0.75,
frozenset({5}): 0.75}
#辅助函数
def aprioriGen(Lk,k,N):
Ck = [] #k+1候选项集
lenLk = len(Lk)
for i in range(lenLk):
#子连接运算
for j in range(i + 1,lenLk):
L1 = list(Lk[i])[:k-2]
L1.sort
L2 = list(Lk[j])[:k-2]
L2.sort()
if L1 == L2:
Ck.append(Lk[i]|Lk[j]) #加入并集
for i in N:
for j in Ck:
if set(i).issubset(j):
Ck.remove(j)
return Ck
def apriori(D,minSupport = 0.5):
C1 = createC1(D)
L1,N,supportData = scanD(D,C1,minSupport)
L = [L1]
k = 2
while (len(L[k-2]) > 0):
Ck = aprioriGen(L[k-2],k,N)
Lk,Nk,supK = scanD(D,Ck,minSupport)
supportData.update(supK)
N += Nk
L.append(Lk)
k += 1
return L,N,supportData
L,N,supportData = apriori(D,minSupport = 0.5)
L
[[frozenset({1}), frozenset({3}), frozenset({2}), frozenset({5})],
[frozenset({1, 3}), frozenset({2, 3}), frozenset({3, 5}), frozenset({2, 5})],
[frozenset({2, 3, 5})],
[]]
N
[frozenset({4}), frozenset({1, 2}), frozenset({1, 5})]
supportData
{frozenset({1}): 0.5,
frozenset({3}): 0.75,
frozenset({4}): 0.25,
frozenset({2}): 0.75,
frozenset({5}): 0.75,
frozenset({1, 3}): 0.5,
frozenset({2, 3}): 0.5,
frozenset({3, 5}): 0.5,
frozenset({2, 5}): 0.75,
frozenset({1, 2}): 0.25,
frozenset({1, 5}): 0.25,
frozenset({2, 3, 5}): 0.5}
第一次发文章,但是Python还没有学完,数据挖掘课程需要敲代码,然后在B站上看了几个UP主的讲解,按照视频简单的敲了下代码。链接是我看的几位UP主的讲解,有需要的小伙伴可以去看下(https://www.bilibili.com/video/BV15Z4y1p7ev?p=2)