K-D树的Python实现

"""
K-D Tree
实现了构建和检索
author:小威
"""
from __future__ import print_function
import heapq
import math


class KDNode(object):
    """节点"""
    def __init__(self, data=None, left=None, right=None, axis=None,
                 sel_axis=None, dimensions=None):
        """ 节点 """
        self.data = data    # 值.    列表
        self.left = left    # 左叶子节点
        self.right = right  # 右叶子节点
        # sel_axis(axis)在创建当前节点的子节点中将被使用,输入为父节点的axis,输出为子节点的axis
        self.axis = axis
        self.sel_axis = sel_axis
        self.dimensions = dimensions

    def __nonzero__(self):
        return self.data is not None

    __bool__ = __nonzero__

    def dist(self, point):
        """计算当前点和指定点的平方距离"""
        return sum([math.pow(self.data[i] - point[i], 2) for i in range(self.dimensions)])

    def search_knn(self, point, k, dist=None):
        """ 寻找和给定值最近的k个节点,输出为元组(node, distance) """
        if dist is None:
            get_dist = (lambda n: n.dist(point))
        else:
            get_dist = (lambda n: dist(n.data, point))

        results = []
        self.search_node(point, k, results, get_dist)

        return [(node, -d) for d, node in sorted(results, reverse=True)]  # (<KdNode>, distance)     排序后输出

    def search_node(self, point, k, results, get_dist):
        """
        :param point: 给定点
        :param k: 查找数目
        :param results: 结果,类型是列表
        :param get_dist: 计算距离
        """
        if not self:
            return

        nodeDist = get_dist(self)
        item = (-nodeDist, self)
        if len(results) >= k:           # 如果堆满了,就替换掉最远的那个值
            if -nodeDist > results[0][0]:
                heapq.heapreplace(results, item)
        else:           # 堆没满就都加入堆中
            heapq.heappush(results, item)
        # 得到分界面
        split_plane = self.data[self.axis]
        # 指定点和分界面的平方距离
        plane_dist = pow(point[self.axis] - split_plane, 2)

        # 从根节点递归向下访问,若point的axis维小于且分点坐标
        # 则移动到左子节点,否则移动到右子节点
        if point[self.axis] < split_plane:
            if self.left is not None:
                self.left.search_node(point, k, results, get_dist)
        else:
            if self.right is not None:
                self.right.search_node(point, k, results, get_dist)

        # 检查父节点的另一子节点是否存在比当前子节点更近的点
        # 判断另一区域是否与当前最近邻的圆相交
        if -plane_dist > results[0][0] or len(results) < k:
            if point[self.axis] < self.data[self.axis]:
                if self.right is not None:
                    self.right.search_node(point, k, results, get_dist)
            else:
                if self.left is not None:
                    self.left.search_node(point, k, results, get_dist)


def create(point_list=None, dimensions=None, axis=0):
    """ 由列表创建一个K-D Tree """
    sel_axis = (lambda prev_axis: (prev_axis+1) % dimensions)   # 用来计算sel_axis

    if not point_list:
        return KDNode(sel_axis=sel_axis, axis=axis, dimensions=dimensions)

    # 排序,选择中心
    point_list = list(point_list)
    point_list.sort(key=lambda point: point[axis])
    median = len(point_list) // 2

    loc = point_list[median]
    left = create(point_list[:median], dimensions, sel_axis(axis))      # sel_axis表示下一次需要切分的维度
    right = create(point_list[median + 1:], dimensions, sel_axis(axis))
    return KDNode(loc, left, right, axis=axis, sel_axis=sel_axis, dimensions=dimensions)

通过create(list)可以创建K-D树,通过kdtree.search_knn(point,k)可以搜索最近的k个点。

对原理有兴趣的话,推荐B站up主https://space.bilibili.com/2136075的有关视频,非常清楚明白。
有任何问题欢迎评论区交流。

以下是使用Python实现FP-growth算法的示例代码: ```python class TreeNode: def __init__(self, name, count, parent): self.name = name self.count = count self.parent = parent self.children = {} self.next = None def increment(self, count): self.count += count def display(self, ind=1): print(' ' * ind, self.name, ' ', self.count) for child in self.children.values(): child.display(ind + 1) def create_tree(dataset, min_support): header_table = {} for trans in dataset: for item in trans: header_table[item] = header_table.get(item, 0) + dataset[trans] for k in list(header_table.keys()): if header_table[k] < min_support: del(header_table[k]) freq_item_set = set(header_table.keys()) if len(freq_item_set) == 0: return None, None for k in header_table: header_table[k] = [header_table[k], None] root = TreeNode('Null Set', 1, None) for trans, count in dataset.items(): local_d = {} for item in trans: if item in freq_item_set: local_d[item] = header_table[item][0] if len(local_d) > 0: ordered_items = [v[0] for v in sorted(local_d.items(), key=lambda p: p[1], reverse=True)] update_tree(ordered_items, root, header_table, count) return root, header_table def update_tree(items, in_tree, header_table, count): if items[0] in in_tree.children: in_tree.children[items[0]].increment(count) else: in_tree.children[items[0]] = TreeNode(items[0], count, in_tree) if header_table[items[0]][1] is None: header_table[items[0]][1] = in_tree.children[items[0]] else: update_header(header_table[items[0]][1], in_tree.children[items[0]]) if len(items) > 1: update_tree(items[1:], in_tree.children[items[0]], header_table, count) def update_header(node_to_test, target_node): while node_to_test.next is not None: node_to_test = node_to_test.next node_to_test.next = target_node def ascend_tree(leaf_node, prefix_path): if leaf_node.parent is not None: prefix_path.append(leaf_node.name) ascend_tree(leaf_node.parent, prefix_path) def find_prefix_path(base_pat, header_table): # first node in linked list tree_node = header_table[base_pat][1] cond_pats = {} while tree_node is not None: prefix_path = [] ascend_tree(tree_node, prefix_path) if len(prefix_path) > 1: cond_pats[tuple(prefix_path[1:])] = tree_node.count tree_node = tree_node.next return cond_pats def mine_tree(in_tree, header_table, min_support, pre_fix, freq_item_list): big_l = [v[0] for v in sorted(header_table.items(), key=lambda p: p[1][0])] for base_pat in big_l: new_freq_set = pre_fix.copy() new_freq_set.add(base_pat) freq_item_list.append(new_freq_set) cond_patt_bases = find_prefix_path(base_pat, header_table) my_cond_tree, my_head = create_tree(cond_patt_bases, min_support) if my_head is not None: mine_tree(my_cond_tree, my_head, min_support, new_freq_set, freq_item_list) def load_dataset(): return [[1, 3, 4], [2, 3, 5], [1, 2, 3, 5], [2, 5]] def create_init_set(dataset): ret_dict = {} for trans in dataset: ret_dict[frozenset(trans)] = 1 return ret_dict if __name__ == '__main__': min_support = 2 dataset = load_dataset() init_set = create_init_set(dataset) fp_tree, header_table = create_tree(init_set, min_support) freq_items = [] mine_tree(fp_tree, header_table, min_support, set([]), freq_items) print(freq_items) ``` 这个示例代码实现了FP-growth算法的主要功能,包括创建FP、查找条件模式基和挖掘频繁项集。在使用时,只需将数据集和最小支持度作为输入参数传递给函数即可。
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

Wei *

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值