b+树实现,Python代码,百万数据测试无报错!!!

本文介绍了使用Python实现的B+树数据结构,包括Bptree_Leaf和Bptree_InterNode类的设计,以及插入和删除操作的详细步骤。B+树是一种自平衡的树,常用于数据库和文件系统中提高查找效率。
摘要由CSDN通过智能技术生成

在这里插入图片描述

import random![在这里插入图片描述](https://img-blog.csdnimg.cn/direct/5604094c0c584ce3a4b9605b44baf7bd.png)

from collections import deque
# 二分,右边插入
def bisect_right(arr: list, val: int, l=0, r=None):
    if l < 0:
        raise ValueError('l must be non-negative')
    if not r:
        r = len(arr)

    while l < r:
        mid = (l+r)//2
        if arr[mid] <= val:
            l = mid+1
        else:
            r = mid
    return l
# 左边插入
def bisect_left(arr: list, val: int, l=0, r=None):
    if l < 0:
        raise ValueError('l must be non-negative')
    if not r:
        r = len(arr)

    while l < r:
        mid = (l+r)//2
        if arr[mid] >= val:
            r = mid
        else:
            l = mid+1
    return l

class LengthError(Exception):
    pass

class HeightError(Exception):
    pass
class InitError(Exception):
    pass

class ParaError(Exception):
    pass

# 定义键值对
class KeyValue(object):
    """
    Python类中属性是用字典储存的,用__slots__可以将不再为实例创建字典,而是
    为每个实例分配固定数量的空间,用于存储 __slots__ 中定义的属性,这样就可以节省内存。
    """
    __slots__ = ('key', 'value')

    def __init__(self, key, value):
        self.key = key
        self.value = value

    def __str__(self):
        return str((self.key, self.value))
    # 二分查找要用
    def __eq__(self, other):
        if isinstance(other, KeyValue):
            if self.key == other.key:
                return True
            else:
                return False
        else:
            if self.key == other:
                return True
            else:
                return False

    def __ne__(self, other):
        if isinstance(other, KeyValue):
            if self.key != other.key:
                return True
            else:
                return False
        else:
            if self.key != other:
                return True
            else:
                return False

    def __lt__(self, other):
        if isinstance(other, KeyValue):
            if self.key < other.key:
                return True
            else:
                return False
        else:
            if self.key < other:
                return True
            else:
                return False

    def __le__(self, other):
        if isinstance(other, KeyValue):
            if self.key <= other.key:
                return True
            else:
                return False
        else:
            if self.key <= other:
                return True
            else:
                return False

    def __gt__(self, other):
        if isinstance(other, KeyValue):
            if self.key > other.key:
                return True
            else:
                return False
        else:
            if self.key > other:
                return True
            else:
                return False

    def __ge__(self, other):
        if isinstance(other, KeyValue):
            if self.key >= other.key:
                return True
            else:
                return False
        else:
            if self.key >= other:
                return True
            else:
                return False

# 非叶节点
class Bptree_InterNode(object):
    def __init__(self, M):
        if not isinstance(M, int):
            raise InitError('M must be int')
        # 目前不支持M为3
        if M <= 3:
            raise InitError('M must be greater then 3')
        self.__M = M
        # 关键字
        self.ilist: [int] = []
        # 指针:孩子节点
        self.clist = []
        # 父节点
        self.par = None

    def isleaf(self):
        return False

    # 非叶节点为了避免插入新节点会溢出,就是节点数等于M-1也算满
    def isfull(self):
        return len(self.ilist) >= self.__M - 1

    # 避免合并后节点数超出
    def isempty(self):
        return len(self.ilist) <= self.__M//2 -1

    @property
    def M(self):
        return self.__M


# 叶节点
class Bptree_Leaf(object):
    def __init__(self, L):
        if not isinstance(L, int):
            raise InitError('L must be int')
        self.__L = L
        self.vlist: [KeyValue] = []
        self.par = None
        # 兄弟节点
        self.bro = None

    def isleaf(self):
        return True


    # 叶节点要超过L才算满
    def isfull(self):
        return len(self.vlist) > self.L

    def isempty(self):
        return len(self.vlist) <= self.__L//2

    @property
    def L(self):
        return self.__L

# bplusTree
class Bptree(object):
    def __init__(self, M, L):
        if L > M:
            raise InitError('M must greater than L')
        self.__L = L
        self.__M = M
        self.__size = 0
        self.__root = Bptree_Leaf(L)
        # 叶节点遍历的开头
        self.__leaf = self.__root
    @property
    def M(self):
        return self.__M

    @property
    def L(self):
        return self.__L

    @property
    def size(self):
        return self.__size

    # --------- insert
    def insert(self, key_value: KeyValue):
        node = self.__root
        self._insert(node, key_value)
        pass

    def _insert(self, node, key_value: KeyValue):
        # 叶节点直接插入
        if node.isleaf():
            idx = bisect_right(node.vlist, key_value.key)

            node.vlist.insert(idx, key_value)
            self.__size += 1
            # 为满则叶分裂
            if node.isfull():
                self._split_leaf(node)

        else:
            # 非叶节点为满也直接分裂,避免后面溢出
            if node.isfull():
                node = self._split_node(node, key_value)
                self._insert(node, key_value)
            # node.clist[idx]的最大值<= key_value.key
            else:
                idx = bisect_right(node.ilist, key_value.key)
                self._insert(node.clist[idx], key_value)



        pass

    def _split_node(self, node: Bptree_InterNode, key_value):
        # 节点数为M-1,向上取整可以确保左右节点平衡,因为左节点要牺牲一个当父节点
        mid = self.M//2
        newnode = Bptree_InterNode(self.M)
        newnode.ilist = node.ilist[mid:]
        newnode.clist = node.clist[mid:]
        # 关键字
        ret = node.ilist[mid-1]
        # 继承孩子节点
        for i in newnode.clist:
            i.par = newnode
        if not node.par:
            newroot = Bptree_InterNode(self.M)
            newroot.ilist.append(node.ilist[mid-1])
            newroot.clist = [node, newnode]
            self.__root = newroot
            node.par = newnode.par = newroot
        else:
            idx = node.par.clist.index(node)
            node.par.clist.insert(idx+1, newnode)
            node.par.ilist.insert(idx, node.ilist[mid-1])
            newnode.par = node.par
        node.ilist = node.ilist[:mid-1]
        node.clist = node.clist[:mid]
        # 二分搜索
        return newnode if ret <= key_value.key else node

    def _split_leaf(self, node: Bptree_Leaf):
        # 节点数为L+1,直接取整
        mid = (self.L+1)//2
        newnode = Bptree_Leaf(self.L)
        newnode.vlist = node.vlist[mid:]
        if not node.par:
            # 非叶节点
            newroot = Bptree_InterNode(self.M)
            newroot.ilist.append(node.vlist[mid].key)
            newroot.clist = [node, newnode]
            newnode.par = node.par = newroot
            self.__root = newroot
        else:
            idx = node.par.clist.index(node)
            node.par.clist.insert(idx+1, newnode)
            node.par.ilist.insert(idx, node.vlist[mid].key)
            newnode.par = node.par
        node.vlist = node.vlist[:mid]
        # 叶节点指针
        if node.bro:
            newnode.bro = node.bro
        node.bro = newnode

        pass

    # --------------search----------------------
    def search(self, left=None, right=None):

        if left is None and right is None:
            raise ParaError('you need to setup searching range')
        elif left and right and left > right:
            raise ParaError('the left bound should litter than right')
        result = []
        # 叶节点头节点
        leaf = self.__leaf
        # 从left开始遍历完所以叶节点
        if right is None:
            idx, leaf_ = self._search_key_left(self.__root, left)
            result.extend(leaf_.vlist[idx:])
            leaf_ = leaf_.bro
            while leaf_:
                result.extend(leaf_.vlist)
                leaf_ = leaf_.bro
            return result
        # 从起点遍历到right
        elif left is None:
            idx, leaf_ = self._search_key_right(self.__root, right)

            while leaf != leaf_:

                result.extend(leaf.vlist)
                leaf = leaf.bro
            # 终点
            result.extend(leaf.vlist[:idx])
            return result
        else:
            idx_left, leaf_left = self._search_key_left(self.__root, left)
            idx_right, leaf_right = self._search_key_right(self.__root, right)
            # 同一个叶节点下
            if leaf_left == leaf_right:
                result.extend(leaf_left.vlist[idx_left:idx_right])
            else:
                result.extend(leaf_left.vlist[idx_left:])
                leaf_left = leaf_left.bro
                while leaf_left != leaf_right:
                    result.extend(leaf_left.vlist)
                    leaf_left = leaf_left.bro
                result.extend(leaf_left.vlist[:idx_right])
            return result


    # 找到节点值为key的最左边元素
    def _search_key_left(self, node, key):
        if node.isleaf():
            idx = bisect_left(node.vlist, key)
            return idx, node
        else:
            idx = bisect_right(node.ilist, key)
            return self._search_key_left(node.clist[idx], key)

        pass
    # 找到节点值大于key的最左元素
    def _search_key_right(self, node, key):
        if node.isleaf():
            idx = bisect_right(node.vlist, key)
            return idx, node
        else:
            idx = bisect_right(node.ilist, key)
            return self._search_key_right(node.clist[idx], key)


    # --------------test----------
    def test(self):
        que = deque()
        h = 0
        que.append([self.__root, h])
        leaf_h = None
        while que:
            for _ in range(len(que)):
                node, heigh = que.popleft()
                if not node.isleaf():
                    if len(node.ilist) >= self.__M:
                        raise LengthError('the internode length is long')
                    if len(node.ilist)+1 != len(node.clist):
                        print('the node length is worth')
                        break
                    for i in node.clist:
                        if i.par != node:
                            print(i.par, node)
                            print('the parent node is worth')
                            break
                        que.append([i, heigh+1])
                else:
                    if leaf_h == None:
                        leaf_h = heigh
                    else:
                        if leaf_h != heigh:
                            raise HeightError('the leaf height is not at the same height ')
                    if len(node.vlist) > self.__M:
                        raise LengthError('the leaf length is long')

    # --------------------------delete----------------------------
    def delete(self, key_value: KeyValue):
        idx, node = self._search_key_left(self.__root, key_value.key)
        if idx == len(node.vlist) or node.vlist[idx] != key_value:
            print('deleted key cannot find')
            return
        node.vlist.pop(idx)
        self.__size -= 1
        # 如果是根直接删除
        if self.__root == node:
            return
        # 有余
        if not node.isempty():
            if idx:
                # 不是最小值
                return
            else:
                # 是最小值则非叶节点一定有该键值对的键的关键字
                self._update_parent_keys(node, node.vlist[0].key)
        # 不有余
        else:
            idx_node = node.par.clist.index(node)
            left_node = node.par.clist[idx_node-1] if idx_node >= 1 else None
            right_node = node.par.clist[idx_node+1] if idx_node < len(node.par.clist)-1 else None
            # 向左节点借
            if left_node and not left_node.isempty():
                node.vlist.insert(0, left_node.vlist.pop())
                # 原来节点的最小值更新了
                self._update_parent_keys(node, node.vlist[0].key)
            elif right_node and not right_node.isempty():
                node.vlist.append(right_node.vlist.pop(0))
                # node删除的是最小值
                if key_value.key <= node.vlist[0].key:
                    self._update_parent_keys(node, node.vlist[0].key)
                # 右节点的最小值也更新了
                self._update_parent_keys(right_node, right_node.vlist[0].key)
            # 合并
            elif left_node:
                # 确保叶节点不断开
                left_node.vlist += node.vlist
                left_node.bro = node.bro
                # node.vlist = left_node.vlist + node.vlist
                left_node.par.clist.pop(idx_node)
                left_node.par.ilist.pop(idx_node-1)
                # 不用更新父节点,因为父节点被删除了
                self._adjust_node(left_node.par)
            # 一定有右节点
            else:
                node.vlist += right_node.vlist
                node.bro = right_node.bro
                node.par.clist.pop(idx_node+1)
                node.par.ilist.pop(idx_node)

                if not idx:
                    self._update_parent_keys(node, node.vlist[0])
                self._adjust_node(node.par)

        pass

    def _update_parent_keys(self, node, update_key):
        if not node.par:
            return
        idx = node.par.clist.index(node)
        if idx == 0:
            self._update_parent_keys(node.par, update_key)
        else:
            node.par.ilist[idx - 1] = update_key

    def _adjust_node(self, node: Bptree_InterNode):
        if node == self.__root:
            # 下移
            if not node.ilist:
                self.__root = node.clist[0]
                self.__root.par = None
            return
        if not node.isempty():
            return
        else:
            idx = node.par.clist.index(node)
            left_node = node.par.clist[idx-1] if idx >= 1 else None
            right_node = node.par.clist[idx+1] if idx < len(node.par.clist)-1 else None
            if left_node and not left_node.isempty():
                self._l2r_node(left_node, node)
            elif right_node and not right_node.isempty():
                self._r2l_node(right_node, node)
            elif left_node:
                self._merge(left_node, node)
                self._adjust_node(left_node.par)
            else:
                self._merge(node, right_node)
                self._adjust_node(node.par)


        pass

    def _r2l_node(self, right_node: Bptree_InterNode, node: Bptree_InterNode):
        parent = right_node.par
        idx = parent.clist.index(node)
        borrow_key = parent.ilist[idx]
        parent.ilist[idx] = right_node.ilist.pop(0)

        node.ilist.append(borrow_key)
        pop_ = right_node.clist.pop(0)
        # 记得继承
        pop_.par = node
        node.clist.append(pop_)


    def _l2r_node(self, left_node: Bptree_InterNode, node: Bptree_InterNode):
        parent = left_node.par
        # 定位要替换的父节点的关键字
        idx = parent.clist.index(left_node)
        # 下移的关键字
        borrow_key = parent.ilist[idx]
        # 更新
        parent.ilist[idx] = left_node.ilist.pop()
        node.ilist.insert(0, borrow_key)
        pop_ = left_node.clist.pop()
        pop_.par = node
        node.clist.insert(0, pop_)



    def _merge(self, left_node: Bptree_InterNode, node: Bptree_InterNode):
        parent = left_node.par
        idx = parent.clist.index(left_node)
        # 父节点下移
        parent.clist[idx].ilist.append(parent.ilist[idx])
        parent.ilist.pop(idx)
        # merge
        pop_ = parent.clist.pop(idx + 1)
        parent.clist[idx].ilist += pop_.ilist
        for i in pop_.clist:
            i.par = left_node
        parent.clist[idx].clist += pop_.clist

if __name__ == '__main__':
    tree = Bptree(4, 4)
    vals = random.sample(list(range(10000)), 10000)
    for i in vals:

        tree.insert(KeyValue(i, i**2))
    print(tree.size)
    tree.test()
    for kv in tree.search(0):
        print(kv)
    # tree.test()
    for i in vals[:]:
        tree.test()
        tree.delete(KeyValue(i, i ** 2))
    print(tree.size)
    for kv in tree.search(0):
        print(kv)
  • 2
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值