import random![在这里插入图片描述](https://img-blog.csdnimg.cn/direct/5604094c0c584ce3a4b9605b44baf7bd.png)
from collections import deque
# 二分,右边插入
def bisect_right(arr: list, val: int, l=0, r=None):
if l < 0:
raise ValueError('l must be non-negative')
if not r:
r = len(arr)
while l < r:
mid = (l+r)//2
if arr[mid] <= val:
l = mid+1
else:
r = mid
return l
# 左边插入
def bisect_left(arr: list, val: int, l=0, r=None):
if l < 0:
raise ValueError('l must be non-negative')
if not r:
r = len(arr)
while l < r:
mid = (l+r)//2
if arr[mid] >= val:
r = mid
else:
l = mid+1
return l
class LengthError(Exception):
pass
class HeightError(Exception):
pass
class InitError(Exception):
pass
class ParaError(Exception):
pass
# 定义键值对
class KeyValue(object):
"""
Python类中属性是用字典储存的,用__slots__可以将不再为实例创建字典,而是
为每个实例分配固定数量的空间,用于存储 __slots__ 中定义的属性,这样就可以节省内存。
"""
__slots__ = ('key', 'value')
def __init__(self, key, value):
self.key = key
self.value = value
def __str__(self):
return str((self.key, self.value))
# 二分查找要用
def __eq__(self, other):
if isinstance(other, KeyValue):
if self.key == other.key:
return True
else:
return False
else:
if self.key == other:
return True
else:
return False
def __ne__(self, other):
if isinstance(other, KeyValue):
if self.key != other.key:
return True
else:
return False
else:
if self.key != other:
return True
else:
return False
def __lt__(self, other):
if isinstance(other, KeyValue):
if self.key < other.key:
return True
else:
return False
else:
if self.key < other:
return True
else:
return False
def __le__(self, other):
if isinstance(other, KeyValue):
if self.key <= other.key:
return True
else:
return False
else:
if self.key <= other:
return True
else:
return False
def __gt__(self, other):
if isinstance(other, KeyValue):
if self.key > other.key:
return True
else:
return False
else:
if self.key > other:
return True
else:
return False
def __ge__(self, other):
if isinstance(other, KeyValue):
if self.key >= other.key:
return True
else:
return False
else:
if self.key >= other:
return True
else:
return False
# 非叶节点
class Bptree_InterNode(object):
def __init__(self, M):
if not isinstance(M, int):
raise InitError('M must be int')
# 目前不支持M为3
if M <= 3:
raise InitError('M must be greater then 3')
self.__M = M
# 关键字
self.ilist: [int] = []
# 指针:孩子节点
self.clist = []
# 父节点
self.par = None
def isleaf(self):
return False
# 非叶节点为了避免插入新节点会溢出,就是节点数等于M-1也算满
def isfull(self):
return len(self.ilist) >= self.__M - 1
# 避免合并后节点数超出
def isempty(self):
return len(self.ilist) <= self.__M//2 -1
@property
def M(self):
return self.__M
# 叶节点
class Bptree_Leaf(object):
def __init__(self, L):
if not isinstance(L, int):
raise InitError('L must be int')
self.__L = L
self.vlist: [KeyValue] = []
self.par = None
# 兄弟节点
self.bro = None
def isleaf(self):
return True
# 叶节点要超过L才算满
def isfull(self):
return len(self.vlist) > self.L
def isempty(self):
return len(self.vlist) <= self.__L//2
@property
def L(self):
return self.__L
# bplusTree
class Bptree(object):
def __init__(self, M, L):
if L > M:
raise InitError('M must greater than L')
self.__L = L
self.__M = M
self.__size = 0
self.__root = Bptree_Leaf(L)
# 叶节点遍历的开头
self.__leaf = self.__root
@property
def M(self):
return self.__M
@property
def L(self):
return self.__L
@property
def size(self):
return self.__size
# --------- insert
def insert(self, key_value: KeyValue):
node = self.__root
self._insert(node, key_value)
pass
def _insert(self, node, key_value: KeyValue):
# 叶节点直接插入
if node.isleaf():
idx = bisect_right(node.vlist, key_value.key)
node.vlist.insert(idx, key_value)
self.__size += 1
# 为满则叶分裂
if node.isfull():
self._split_leaf(node)
else:
# 非叶节点为满也直接分裂,避免后面溢出
if node.isfull():
node = self._split_node(node, key_value)
self._insert(node, key_value)
# node.clist[idx]的最大值<= key_value.key
else:
idx = bisect_right(node.ilist, key_value.key)
self._insert(node.clist[idx], key_value)
pass
def _split_node(self, node: Bptree_InterNode, key_value):
# 节点数为M-1,向上取整可以确保左右节点平衡,因为左节点要牺牲一个当父节点
mid = self.M//2
newnode = Bptree_InterNode(self.M)
newnode.ilist = node.ilist[mid:]
newnode.clist = node.clist[mid:]
# 关键字
ret = node.ilist[mid-1]
# 继承孩子节点
for i in newnode.clist:
i.par = newnode
if not node.par:
newroot = Bptree_InterNode(self.M)
newroot.ilist.append(node.ilist[mid-1])
newroot.clist = [node, newnode]
self.__root = newroot
node.par = newnode.par = newroot
else:
idx = node.par.clist.index(node)
node.par.clist.insert(idx+1, newnode)
node.par.ilist.insert(idx, node.ilist[mid-1])
newnode.par = node.par
node.ilist = node.ilist[:mid-1]
node.clist = node.clist[:mid]
# 二分搜索
return newnode if ret <= key_value.key else node
def _split_leaf(self, node: Bptree_Leaf):
# 节点数为L+1,直接取整
mid = (self.L+1)//2
newnode = Bptree_Leaf(self.L)
newnode.vlist = node.vlist[mid:]
if not node.par:
# 非叶节点
newroot = Bptree_InterNode(self.M)
newroot.ilist.append(node.vlist[mid].key)
newroot.clist = [node, newnode]
newnode.par = node.par = newroot
self.__root = newroot
else:
idx = node.par.clist.index(node)
node.par.clist.insert(idx+1, newnode)
node.par.ilist.insert(idx, node.vlist[mid].key)
newnode.par = node.par
node.vlist = node.vlist[:mid]
# 叶节点指针
if node.bro:
newnode.bro = node.bro
node.bro = newnode
pass
# --------------search----------------------
def search(self, left=None, right=None):
if left is None and right is None:
raise ParaError('you need to setup searching range')
elif left and right and left > right:
raise ParaError('the left bound should litter than right')
result = []
# 叶节点头节点
leaf = self.__leaf
# 从left开始遍历完所以叶节点
if right is None:
idx, leaf_ = self._search_key_left(self.__root, left)
result.extend(leaf_.vlist[idx:])
leaf_ = leaf_.bro
while leaf_:
result.extend(leaf_.vlist)
leaf_ = leaf_.bro
return result
# 从起点遍历到right
elif left is None:
idx, leaf_ = self._search_key_right(self.__root, right)
while leaf != leaf_:
result.extend(leaf.vlist)
leaf = leaf.bro
# 终点
result.extend(leaf.vlist[:idx])
return result
else:
idx_left, leaf_left = self._search_key_left(self.__root, left)
idx_right, leaf_right = self._search_key_right(self.__root, right)
# 同一个叶节点下
if leaf_left == leaf_right:
result.extend(leaf_left.vlist[idx_left:idx_right])
else:
result.extend(leaf_left.vlist[idx_left:])
leaf_left = leaf_left.bro
while leaf_left != leaf_right:
result.extend(leaf_left.vlist)
leaf_left = leaf_left.bro
result.extend(leaf_left.vlist[:idx_right])
return result
# 找到节点值为key的最左边元素
def _search_key_left(self, node, key):
if node.isleaf():
idx = bisect_left(node.vlist, key)
return idx, node
else:
idx = bisect_right(node.ilist, key)
return self._search_key_left(node.clist[idx], key)
pass
# 找到节点值大于key的最左元素
def _search_key_right(self, node, key):
if node.isleaf():
idx = bisect_right(node.vlist, key)
return idx, node
else:
idx = bisect_right(node.ilist, key)
return self._search_key_right(node.clist[idx], key)
# --------------test----------
def test(self):
que = deque()
h = 0
que.append([self.__root, h])
leaf_h = None
while que:
for _ in range(len(que)):
node, heigh = que.popleft()
if not node.isleaf():
if len(node.ilist) >= self.__M:
raise LengthError('the internode length is long')
if len(node.ilist)+1 != len(node.clist):
print('the node length is worth')
break
for i in node.clist:
if i.par != node:
print(i.par, node)
print('the parent node is worth')
break
que.append([i, heigh+1])
else:
if leaf_h == None:
leaf_h = heigh
else:
if leaf_h != heigh:
raise HeightError('the leaf height is not at the same height ')
if len(node.vlist) > self.__M:
raise LengthError('the leaf length is long')
# --------------------------delete----------------------------
def delete(self, key_value: KeyValue):
idx, node = self._search_key_left(self.__root, key_value.key)
if idx == len(node.vlist) or node.vlist[idx] != key_value:
print('deleted key cannot find')
return
node.vlist.pop(idx)
self.__size -= 1
# 如果是根直接删除
if self.__root == node:
return
# 有余
if not node.isempty():
if idx:
# 不是最小值
return
else:
# 是最小值则非叶节点一定有该键值对的键的关键字
self._update_parent_keys(node, node.vlist[0].key)
# 不有余
else:
idx_node = node.par.clist.index(node)
left_node = node.par.clist[idx_node-1] if idx_node >= 1 else None
right_node = node.par.clist[idx_node+1] if idx_node < len(node.par.clist)-1 else None
# 向左节点借
if left_node and not left_node.isempty():
node.vlist.insert(0, left_node.vlist.pop())
# 原来节点的最小值更新了
self._update_parent_keys(node, node.vlist[0].key)
elif right_node and not right_node.isempty():
node.vlist.append(right_node.vlist.pop(0))
# node删除的是最小值
if key_value.key <= node.vlist[0].key:
self._update_parent_keys(node, node.vlist[0].key)
# 右节点的最小值也更新了
self._update_parent_keys(right_node, right_node.vlist[0].key)
# 合并
elif left_node:
# 确保叶节点不断开
left_node.vlist += node.vlist
left_node.bro = node.bro
# node.vlist = left_node.vlist + node.vlist
left_node.par.clist.pop(idx_node)
left_node.par.ilist.pop(idx_node-1)
# 不用更新父节点,因为父节点被删除了
self._adjust_node(left_node.par)
# 一定有右节点
else:
node.vlist += right_node.vlist
node.bro = right_node.bro
node.par.clist.pop(idx_node+1)
node.par.ilist.pop(idx_node)
if not idx:
self._update_parent_keys(node, node.vlist[0])
self._adjust_node(node.par)
pass
def _update_parent_keys(self, node, update_key):
if not node.par:
return
idx = node.par.clist.index(node)
if idx == 0:
self._update_parent_keys(node.par, update_key)
else:
node.par.ilist[idx - 1] = update_key
def _adjust_node(self, node: Bptree_InterNode):
if node == self.__root:
# 下移
if not node.ilist:
self.__root = node.clist[0]
self.__root.par = None
return
if not node.isempty():
return
else:
idx = node.par.clist.index(node)
left_node = node.par.clist[idx-1] if idx >= 1 else None
right_node = node.par.clist[idx+1] if idx < len(node.par.clist)-1 else None
if left_node and not left_node.isempty():
self._l2r_node(left_node, node)
elif right_node and not right_node.isempty():
self._r2l_node(right_node, node)
elif left_node:
self._merge(left_node, node)
self._adjust_node(left_node.par)
else:
self._merge(node, right_node)
self._adjust_node(node.par)
pass
def _r2l_node(self, right_node: Bptree_InterNode, node: Bptree_InterNode):
parent = right_node.par
idx = parent.clist.index(node)
borrow_key = parent.ilist[idx]
parent.ilist[idx] = right_node.ilist.pop(0)
node.ilist.append(borrow_key)
pop_ = right_node.clist.pop(0)
# 记得继承
pop_.par = node
node.clist.append(pop_)
def _l2r_node(self, left_node: Bptree_InterNode, node: Bptree_InterNode):
parent = left_node.par
# 定位要替换的父节点的关键字
idx = parent.clist.index(left_node)
# 下移的关键字
borrow_key = parent.ilist[idx]
# 更新
parent.ilist[idx] = left_node.ilist.pop()
node.ilist.insert(0, borrow_key)
pop_ = left_node.clist.pop()
pop_.par = node
node.clist.insert(0, pop_)
def _merge(self, left_node: Bptree_InterNode, node: Bptree_InterNode):
parent = left_node.par
idx = parent.clist.index(left_node)
# 父节点下移
parent.clist[idx].ilist.append(parent.ilist[idx])
parent.ilist.pop(idx)
# merge
pop_ = parent.clist.pop(idx + 1)
parent.clist[idx].ilist += pop_.ilist
for i in pop_.clist:
i.par = left_node
parent.clist[idx].clist += pop_.clist
if __name__ == '__main__':
tree = Bptree(4, 4)
vals = random.sample(list(range(10000)), 10000)
for i in vals:
tree.insert(KeyValue(i, i**2))
print(tree.size)
tree.test()
for kv in tree.search(0):
print(kv)
# tree.test()
for i in vals[:]:
tree.test()
tree.delete(KeyValue(i, i ** 2))
print(tree.size)
for kv in tree.search(0):
print(kv)
b+树实现,Python代码,百万数据测试无报错!!!
于 2024-03-09 08:56:44 首次发布
本文介绍了使用Python实现的B+树数据结构,包括Bptree_Leaf和Bptree_InterNode类的设计,以及插入和删除操作的详细步骤。B+树是一种自平衡的树,常用于数据库和文件系统中提高查找效率。
摘要由CSDN通过智能技术生成