基于Python语言使用FP-Growth算法构建FP树

# 定义节点类
class Node:
    def __init__(self, name, parent=None, count=1):
        self.name = name
        self.count = count
        self.parent = parent
        self.children = {}

    # 打印节点信息
    def __str__(self):
        return 'name: {}, count: {}'.format(self.name, self.count)

# 构建FP树
def create_fp_tree(transactions, min_sup):
    # 计算每个项的支持度
    freq_itemsets = {}
    for trans in transactions:
        for item in trans:
            freq_itemsets[item] = freq_itemsets.get(item, 0) + 1

    # 移除不满足最小支持度的项
    freq_itemsets = {k: v for k, v in freq_itemsets.items() if v >= min_sup}

    # 如果没有满足最小支持度的项,则停止生长
    if len(freq_itemsets) == 0:
        return None, None

    # 构建头指针表,用于快速访问同名节点
    header = {}
    for item in freq_itemsets.keys():
        header[item] = None

    # 构建树
    root = Node("root")
    for trans, count in zip(transactions, range(len(transactions))):
        # 按照支持度降序排序
        sorted_trans = sorted([item for item in trans if item in freq_itemsets.keys()], 
                              key=lambda item: freq_itemsets[item], reverse=True)
        # 根据排序结果插入节点
        parent = root
        for item in sorted_trans:
            if item not in parent.children:
                parent.children[item] = Node(item, parent, count+1)
                if header[item] is None:
                    header[item] = parent.children[item]
                else:
                    cur_node = header[item]
                    while cur_node.next is not None:
                        cur_node = cur_node.next
                    cur_node.next = parent.children[item]
            else:
                parent.children[item].count += 1
            parent = parent.children[item]

    return root, header

# 获取节点的前缀路径
def get_prefix_path(node):
    path = []
    while node is not None and node.name != 'root':
        path.append(node.name)
        node = node.parent
    return path[::-1]

# 检查是否需要回溯
def needs_backtracking(path, node):
    for p in path:
        if p in node.children:
            return True
    return False

# 挖掘FP树中的频繁项集
def fp_growth(root, header, min_sup, prefix=[]):
    freq_itemsets = []
    # 根据支持度降序遍历头指针表
    for item in sorted(header.keys(), key=lambda i: header[i].count):
        new_prefix = prefix + [item]

        # 生成条件模式基以用于构建新的FP树
        conditional_base = []
        node = header[item]
        while node is not None:
            path = get_prefix_path(node.parent)
            if len(path) > 0:
                conditional_base.append(path)
            node = node.next

        # 创建新的FP树
        cond_root, cond_header = create_fp_tree(conditional_base, min_sup)

        # 递归挖掘频繁项集
        if cond_root is not None:
            freq_itemsets += fp_growth(cond_root, cond_header, min_sup, new_prefix)

    # 将当前项加入频繁项集
    if len(prefix) > 0:
        freq_itemsets.append(prefix)

    return freq_itemsets

# 示例
transactions = [['a', 'c', 'd'], ['a', 'b', 'd'], ['a', 'c', 'd'], ['a', 'f'], ['a', 'b'], ['a', 'b'], ['a', 'b'], ['a', 'b', 'c', 'd'], ['a', 'd'], ['a', 'c'], ['c', 'd', 'f'], ['a', 'e'], ['a', 'b'], ['a', 'b'], ['a', 'b'], ['a', 'c', 'd', 'f'], ['a', 'c'], ['a', 'c', 'e'], ['a', 'b'], ['a', 'b', 'd'], ['a', 'b', 'c', e'], ['a', 'b'], ['a', 'c', 'e'], ['a', 'b', 'd'], ['a', 'e'], ['a', 'c', 'd'], ['a', 'b', 'c', 'd'], ['a', 'b', 'c'], ['a', 'b', 'c'], ['a', 'b', 'c', e'], ['a', 'd'], ['a', 'b', 'c', e']]
min_sup = 3

root, header = create_fp_tree(transactions, min_sup)
freq_itemsets = fp_growth(root, header, min_sup)

print(freq_itemsets)

  • 0
    点赞
  • 2
    收藏
    觉得还不错? 一键收藏
  • 1
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值