Apriori算法:
def loadData():
return [[1,2,5], [2,4], [2,3], [1,2,4], [1,3], [2,3], [1,3], [1,2,3,5], [1,2,3]]
def find_frequent_1_itemsets(D, minsupport):
L1 = []
C1 = []
cnt = {}
for transcation in D:
for item in transcation:
if not [item] in C1:
C1.append([item])
cnt[item] = 1
else:
cnt[item] += 1
for transcation in C1:
for item in transcation:
if cnt[item] >= minsupport:
L1.append(transcation)
L1.sort()
return L1
def aproiri_gen(L, k):
res = []
lenL = len(L)
for i in range(lenL):
for j in range(i+1,lenL):
l1 = L[i][:-1]
l2 = L[j][:-1]
if l1 == l2 and L[i][-1] < L[j][-1]:
candidate = list(set(L[i]).union(set(L[j])))
if not has_infrequent_subset(candidate, L):
res.append(candidate)
return res
def has_infrequent_subset(candidate, L):
for i in range(len(candidate)):
subset = candidate.copy()
subset.remove(candidate[i])
if subset not in L:
return True
return False
def compareList(l1, l2):
for item in l1:
if item not in l2:
return False
return True
def Aproiri(D, minsupport):
L = []
L1 = find_frequent_1_itemsets(D, minsupport)
L.append([])
L.append(L1)
for k in range(2, 5):
Lk = []
if len(L[k-1]) == 0:
break
Ck = aproiri_gen(L[k-1], k-1)
print("自链接加剪枝后得到的候选Ck:" , Ck)
print("遍历D对每个候选计数")
for candi in Ck:
cnt = 0
for transcation in D:
if compareList(candi, transcation):
cnt += 1
if cnt >= minsupport:
print ("符合要求的项集: ", candi, "出现次数: ",cnt)
Lk.append(candi)
L.append(Lk)
return L
#test
D = loadData()
L = Aproiri(D, 2)
print (L)