# 定义节点类
class Node:
def __init__(self, name, parent=None, count=1):
self.name = name
self.count = count
self.parent = parent
self.children = {}
# 打印节点信息
def __str__(self):
return 'name: {}, count: {}'.format(self.name, self.count)
# 构建FP树
def create_fp_tree(transactions, min_sup):
# 计算每个项的支持度
freq_itemsets = {}
for trans in transactions:
for item in trans:
freq_itemsets[item] = freq_itemsets.get(item, 0) + 1
# 移除不满足最小支持度的项
freq_itemsets = {k: v for k, v in freq_itemsets.items() if v >= min_sup}
# 如果没有满足最小支持度的项,则停止生长
if len(freq_itemsets) == 0:
return None, None
# 构建头指针表,用于快速访问同名节点
header = {}
for item in freq_itemsets.keys():
header[item] = None
# 构建树
root = Node("root")
for trans, count in zip(transactions, range(len(transactions))):
# 按照支持度降序排序
sorted_trans = sorted([item for item in trans if item in freq_itemsets.keys()],
key=lambda item: freq_itemsets[item], reverse=True)
# 根据排序结果插入节点
parent = root
for item in sorted_trans:
if item not in parent.children:
parent.children[item] = Node(item, parent, count+1)
if header[item] is None:
header[item] = parent.children[item]
else:
cur_node = header[item]
while cur_node.next is not None:
cur_node = cur_node.next
cur_node.next = parent.children[item]
else:
parent.children[item].count += 1
parent = parent.children[item]
return root, header
# 获取节点的前缀路径
def get_prefix_path(node):
path = []
while node is not None and node.name != 'root':
path.append(node.name)
node = node.parent
return path[::-1]
# 检查是否需要回溯
def needs_backtracking(path, node):
for p in path:
if p in node.children:
return True
return False
# 挖掘FP树中的频繁项集
def fp_growth(root, header, min_sup, prefix=[]):
freq_itemsets = []
# 根据支持度降序遍历头指针表
for item in sorted(header.keys(), key=lambda i: header[i].count):
new_prefix = prefix + [item]
# 生成条件模式基以用于构建新的FP树
conditional_base = []
node = header[item]
while node is not None:
path = get_prefix_path(node.parent)
if len(path) > 0:
conditional_base.append(path)
node = node.next
# 创建新的FP树
cond_root, cond_header = create_fp_tree(conditional_base, min_sup)
# 递归挖掘频繁项集
if cond_root is not None:
freq_itemsets += fp_growth(cond_root, cond_header, min_sup, new_prefix)
# 将当前项加入频繁项集
if len(prefix) > 0:
freq_itemsets.append(prefix)
return freq_itemsets
# 示例
transactions = [['a', 'c', 'd'], ['a', 'b', 'd'], ['a', 'c', 'd'], ['a', 'f'], ['a', 'b'], ['a', 'b'], ['a', 'b'], ['a', 'b', 'c', 'd'], ['a', 'd'], ['a', 'c'], ['c', 'd', 'f'], ['a', 'e'], ['a', 'b'], ['a', 'b'], ['a', 'b'], ['a', 'c', 'd', 'f'], ['a', 'c'], ['a', 'c', 'e'], ['a', 'b'], ['a', 'b', 'd'], ['a', 'b', 'c', e'], ['a', 'b'], ['a', 'c', 'e'], ['a', 'b', 'd'], ['a', 'e'], ['a', 'c', 'd'], ['a', 'b', 'c', 'd'], ['a', 'b', 'c'], ['a', 'b', 'c'], ['a', 'b', 'c', e'], ['a', 'd'], ['a', 'b', 'c', e']]
min_sup = 3
root, header = create_fp_tree(transactions, min_sup)
freq_itemsets = fp_growth(root, header, min_sup)
print(freq_itemsets)
05-28