代码实现:
# 输入:
# D:事务数据库
# min_sup:最小支持度阈值
# 输出:L,D中的频繁项集
import pandas as pd
import numpy as np
D = pd.read_excel('menu_orders.xlsx',header = None)#导入事务数据库
p = []#放事务数据的数组
for i in range(1,11):
p.append(list(np.array(D[i-1:i])[0]))
def Apriori_one(D,min_sep):
m = {'a': 0, 'b': 0, 'c': 0, 'd': 0, 'e': 0}
for i in range(10):
for j in p[i]:
if j in list(m.keys()):
m[j]+=1
for n in list(m.keys()):
if m[n] <min_sep:
m.pop(n)
return m#返回频繁1项集
def apriori_gen(Lk):#Lk频繁项集字典
Lm = {} # L1:放连接后的候选项集
for l1 in list(Lk.keys()):
for l2 in list(Lk.keys()):
if len(l1) ==1 and l1 < l2:
c=l1+l2 #c:表示连接候选项集
if not has_infrequent_subset(c,Lk):
Lm[c] = 0
elif len