B树特征
B-树是一种专门用于磁盘快速检索的数据结构。为了简化,这里只用内存先实现以方便大家了解B-树的原理。B-树有以下几个特征:
- B树每个节点最大有m个子节点,最大可以存储m-1个数据,m只能为偶数,不能为奇数,最小为4。
- B树除root节点外,其余节点至少存储t-1个数据,t=m/2。
- 除root节点外,其余节点至少要有t个子节点。
- B树是排序树,第n个子节点key的取值范围在keys[n-1]和[n]之间。
难以理解的话,我给张m=4,插入了20个节点的B树的图:
B树内部操作
B树存在六种内部操作:根分裂、非根分裂、根合并、非根合并、左移、右移。
根分裂
根分裂算法是,从中间将数据拆成左右两份,然后根指向左右两个节点,如下图所示:
非根分裂
非根分裂算法也从中间将数据拆成左右两份,但是要将中间节点插入在父节点中,并且要让父节点指向新分裂出来的兄弟节点。
根合并
插入时为了防止溢出,需要分裂。反过来,删除时为了防止下溢,所以需要合并。合并是分裂的逆过程。根合并会降低B-树的高度,条件比较严苛,需要第二层能够合并并且root只有一个key。
非根合并
这种合并就是为了删除数据后还能保持B-树的特征。合并的关键在于需要从父节点拉一个节点下来。
左移
右移
插入操作
有了前面六种基本操作之后,才可以写插入操作。插入比较简单,在搜索过程中,只要遇到满了的节点就分裂,直到遇到叶子就插入。分裂会导致上层节点元素增加,上层元素的增加会传递到根,最终导致根的分裂。根的分裂使得B-树层数增加。为什么遇到了就需要分裂呢?因为下层的分裂将一个元素移入上层,所以搜索途中遇到的节点只要是满的,都要分裂。以下是分裂并插入新值的图示:
根分裂然后插入
非根分裂然后插入
删除操作
删除操作也是非常简单的。其基本逻辑是分为两部分,第一部分是搜索,第二部分是删除。
删除搜索过程
在删除的搜索中,需要判断经过的节点key的数量是不是t-1
(
t
=
m
/
2
)
(t=m/2)
(t=m/2)。如果是这种情况,则需要通过四种操作,根合并、非根合并、左移、右移,使得节点数量增加。为什么要这么做呢?因为在自身节点删除会破坏最小数量为t-1的规定。就算删除是发生在子节点,因为非根的合并需要从父节点拉一个元素下来,所以依然会破坏最小为t-1的规定。总不能打赌子节点不会合并只会左移右移,所以父节点为t-1个的场景不进行任何操作。打赌肯定不行,判断会让代码更加复杂。所以最好的办法就是key的数量t-1时就进行合并等操作。但是有四种操作啊。所以t-1的场景必须要分子场景:
1、自身为root,不进行任何操作
2、任意找个兄弟,如果兄弟和自己key的size相加小于m-1
2.1 自己是第二层,也就是父节点是根节点,那么执行根合并
2.2 否则那么执行非根合并
3、如果兄弟和自己key的size相加大于或等于m-1
2.1 自己是弟弟,左移
2.2 自己是哥哥,右移
删除执行过程
如果是叶子,那么直接删除。如果不是叶子,在左子节点找最大值或右子节点找最小值替换自己,这个看个人喜好。替换完成之后,在相应节点重新执行删除过程,也就是递归删除。以删除6为例子:
用7替换6
进入递归过程,也就是在子节点删除7,因为子节点太短,所以左移
最终完成了对6的删除
完整java代码
B树代码
package com.youngthing.trees.b;
import com.youngthing.trees.Node;
import com.youngthing.trees.Tree;
import java.util.List;
/**
* b+树
* created at 09/02/2022
*
* @author 花书粉丝
* <a href="mailto://yujianbo@chtwm.com">yujianbo@chtwm.com</a>
* @since 1.0.0
*/
public class BTree<T extends Comparable<T>> extends MultiwayTree<T> implements Tree<T, List<T>> {
public static boolean debugNonRootSplit = false;
public static boolean debugRootSplit = false;
public static boolean debugNonRootMerge = false;
public static boolean debugRootMerge = false;
public static boolean debugLeftShift = false;
public static boolean debugRightShift = false;
public static boolean debugReplace = false;
public BTree(int m) {
super.root = new BNode<>(m);
}
@Override
public Node<List<T>> getRoot() {
return root;
}
@Override
public void add(T t) {
((BNode<T>) root).add(t);
}
@Override
public void delete(T t) {
((BNode<T>) root).delete(t);
}
@Override
public MultiwayNode<T> findNode(T t) {
return ((SortedMultiwayNode<T>) root).findNode(t);
}
}
BNode代码
package com.youngthing.trees.b;
/**
* B+树节点
* created at 09/02/2022
*
* @author 花书粉丝
* <a href="mailto://yujianbo@chtwm.com">yujianbo@chtwm.com</a>
* @since 1.0.0
*/
public class BNode<T extends Comparable<T>> extends SortedMultiwayNode<T> {
public BNode(int m) {
super(m);
}
public void add(T key) {
BNode<T> x = this;
while (true) {
if (x.isFull()) {
x.split();
if (x.getParent() == null) {
if (key.compareTo((T) x.values[0]) < 0) {
x = (BNode<T>) x.branches(0);
} else {
x = (BNode<T>) x.branches(1);
}
} else {
x = x.getParent();
}
}
if (x.isLeaf()) {
break;
}
x = x.nextNode(key);
}
x.insert(key);
}
public void delete(T key) {
BNode<T> x = this;
final BNode<T> root = (BNode<T>) getRoot();
int t = m / 2;
int index = -1;
while (true) {
if (x.length == t - 1 && x != root) {
x = x.shiftOrMerge();
}
int[] indices = x.binarySearch(key);
if (indices.length == 1) {
index = indices[0];
break;
}
x = (BNode<T>) x.nextNode(indices);
}
if (x == null) {
throw new RuntimeException("key不存在");
}
if (x.isLeaf()) {
x.removeKey(index);
} else {
// 左边的最大或者右边的最小
final BNode<T> next = (BNode<T>) x.branches(index + 1);
T successor = next.min();
if (BTree.debugReplace) {
System.out.println("before-replace");
System.out.println(root.toGraphString());
}
x.values[index] = successor;
if (BTree.debugReplace) {
System.out.println("after-replace");
System.out.println(root.toGraphString());
}
next.delete(successor);
if (BTree.debugReplace) {
System.out.println("after-delete");
System.out.println(root.toGraphString());
}
}
}
@Override
public BNode<T> getParent() {
return (BNode<T>) super.getParent();
}
private BNode<T> shiftOrMerge() {
final int t = m / 2;
// 属于自己比较少的情况 本身是root
// 再获取兄弟
BNode<T> result = this;
BNode<T> sibling = (BNode<T>) super.getSibling();
if (sibling != null) {
if (this.length + sibling.length < m - 1) {
if (getParent().isRoot() && getParent().length == 1) {
// 缩小高度
getParent().mergeToRoot();
return getParent();
} else if (this.indexInParent == getParent().length) {
// 自己是最右边
getParent().mergeToLeft(this.indexInParent - 1);
return (BNode<T>) getParent().branches(this.indexInParent - 1);
} else {
// 自己是左边
getParent().mergeToLeft(this.indexInParent);
return this;
}
} else {
// 自己是左边
if (sibling.indexInParent > this.indexInParent) {
this.leftShift();
return this;
} else {
// 自己是右边
sibling.rightShift();
return this;
}
}
} else {
System.out.println(getRoot().toGraphString());
throw new RuntimeException("sibling是NULL不知道怎么处理");
}
}
private void removeKey(int index) {
for (int i = index; i < length - 1; i++) {
values[i] = values[i + 1];
}
if (!isLeaf()) {
for (int i = index; i < length + 1; i++) {
branches(i, branches(i + 1));
}
}
length--;
}
private BNode<T> copy(int startIndex, int length) {
final BNode<T> node = new BNode<>(this.m);
for (int i = startIndex; i < startIndex + length; i++) {
node.values[i - startIndex] = this.values[i];
}
if (!this.isLeaf()) {
node.initBranches();
for (int i = startIndex; i < startIndex + length + 1; i++) {
if (this.branches(i) != null) {
node.branches(i - startIndex, this.branches(i));
}
}
}
node.length = length;
node.indexInParent = indexInParent;
return node;
}
private int insert(T key) {
if (isFull()) {
throw new RuntimeException();
}
final int index = nextIndex(key);
// 要全部移动啊
// 比如 2 5 , insert 3 -> 2 3 5
if (!isLeaf()) {
branches(length + 1, branches(length));
}
for (int i = length; i > index; i--) {
values[i] = values[i - 1];
}
if (!isLeaf()) {
for (int i = length + 1; i > index; i--) {
branches(i, branches(i - 1));
}
}
values[index] = key;
super.length++;
return index;
}
private int nextIndex(T key) {
for (int i = 0; i < length; i++) {
final T value = (T) values[i];
if (value == null) {
return i;
}
if (key.compareTo(value) < 0) {
return i;
}
}
return length;
}
private BNode<T> nextNode(T key) {
final int i = nextIndex(key);
return (BNode<T>) branches(i);
}
private void split() {
// split to three nodes
// middle key
int middleIndex = m / 2 - 1;
if (this.getParent() == null) {
if (BTree.debugRootSplit) {
System.out.println("before-root-split");
System.out.println(this.toGraphString());
}
final BNode<T> left = copy(0, middleIndex);
final BNode<T> right = copy(middleIndex + 1, middleIndex);
this.values[0] = values[middleIndex];
this.length = 1;
this.initBranches();
this.branches(0, left);
this.branches(1, right);
if (BTree.debugRootSplit) {
System.out.println("after-root-split");
System.out.println(this.toGraphString());
}
} else {
if (BTree.debugNonRootSplit) {
System.out.println("before-non-root-split");
System.out.println(getRoot().toGraphString());
}
final BNode<T> right = copy(middleIndex + 1, middleIndex);
BNode<T> p = (BNode<T>) this.getParent();
// 不能调用add方法,只能insert
int parentIndex = p.insert((T) values[middleIndex]);
// 这里出问题了
p.branches(parentIndex + 1, right);
p.branches(p.nextIndex((T) this.values[0]), this);
this.length = middleIndex;
if (BTree.debugNonRootSplit) {
System.out.println("after-nonroot-split");
System.out.println(this.getRoot().toGraphString());
}
}
}
public void leftShift() {
getParent().leftShift(this.indexInParent);
}
public void rightShift() {
getParent().rightShift(this.indexInParent);
}
private void leftShift(int index) {
if (BTree.debugLeftShift) {
System.out.println("before-left-shift");
System.out.println(this.toGraphString());
}
// 右子移动到左边
final MultiwayNode<T> left = this.branches(index);
final MultiwayNode<T> right = this.branches(index + 1);
left.values[left.length++] = this.values[index];
if (right.isNotLeaf()) {
left.branches(left.length, right.branches(0));
}
this.values[index] = right.values[0];
// 整体向左挪动
right.moveLeft(1);
right.length--;
if (BTree.debugLeftShift) {
System.out.println("after-left-shift");
System.out.println(getRoot().toGraphString());
}
}
private void rightShift(int index) {
if (BTree.debugRightShift) {
System.out.println("before-right-shift");
System.out.println(getRoot().toGraphString());
}
// left子移动到right
final MultiwayNode<T> left = this.branches(index);
final MultiwayNode<T> right = this.branches(index + 1);
// 整体向右挪动
right.moveRight(1);
right.values[0] = this.values[index];
right.branches(0, left.branches(left.length));
right.length++;
this.values[index] = left.values[left.length - 1];
left.length--;
if (BTree.debugRightShift) {
System.out.println("before-right-shift");
System.out.println(this.toGraphString());
}
}
/**
* 断掉自己的所有节点,降低整棵树的高度
*/
private void mergeToRoot() {
if (BTree.debugRootMerge) {
System.out.println("before-root-merge");
System.out.println(this.getRoot().toGraphString());
}
final MultiwayNode<T> left = this.branches(0);
final MultiwayNode<T> right = this.branches(1);
// this唯一节点移动 移动距离left.length
this.values[left.length] = this.values[0];
// copy left.values left.branches
for (int i = 0; i < left.length; i++) {
this.values[i] = left.values[i];
}
if (left.isNotLeaf()) {
for (int i = 0; i < left.length + 1; i++) {
this.branches(i, left.branches(i));
}
} else {
this.clearBranches();
}
// copy right.values right.branches
for (int i = 0; i < right.length; i++) {
this.values[left.length + i + 1] = right.values[i];
}
if (right.isNotLeaf()) {
for (int i = 0; i < right.length + 1; i++) {
this.branches(left.length + 1 + i, right.branches(i));
}
}
this.length += (left.length + right.length);
if (BTree.debugRootMerge) {
System.out.println("after-root-merge");
System.out.println(this.getRoot().toGraphString());
}
}
/**
* 把自己缩小,断开右节点
*
* @param index
*/
private void mergeToLeft(int index) {
if (BTree.debugNonRootMerge) {
System.out.println("before-nonroot-merge");
System.out.println(this.getRoot().toGraphString());
}
final MultiwayNode<T> left = this.branches(index);
final MultiwayNode<T> right = this.branches(index + 1);
// 这是干嘛的?
left.values[left.length] = this.values[index];
for (int i = left.length + 1; i < left.length + right.length + 1; i++) {
left.values[i] = right.values[i - left.length - 1];
}
if (right.isNotLeaf()) {
for (int i = left.length + 1; i <= left.length + right.length + 1; i++) {
left.branches(i, right.branches(i - left.length - 1));
}
}
left.length += (right.length + 1);
// 整体左移,就自动断开了右边啊
// 有一个是左移移动不到的
if (index < length - 1) {
this.values[index] = this.values[index + 1];
}
this.moveLeft(index + 1, 1);
this.length--;
if (BTree.debugNonRootMerge) {
System.out.println("after-nonroot-merge");
System.out.println(this.getRoot().toGraphString());
}
}
}
MultiwayNode代码
package com.youngthing.trees.b;
import com.youngthing.trees.Node;
import java.util.*;
/**
* 多路节点
* created at 09/02/2022
*
* @author 花书粉丝
* <a href="mailto://yujianbo@chtwm.com">yujianbo@chtwm.com</a>
* @since 1.0.0
*/
public class MultiwayNode<T extends Comparable<T>> implements Node<List<T>> {
protected Object[] values;
private MultiwayNode<T>[] branches;
protected int length;
protected int m;
private MultiwayNode<T> parent;
protected int indexInParent;
public MultiwayNode(int m) {
this.m = m;
this.values = new Object[m - 1];
// this.branches = new MultiwayNode[m];
}
@Override
public List<T> getValue() {
final ArrayList<T> list = new ArrayList<>();
for (int i = 0; i < length; i++) {
list.add((T) values[i]);
}
return list;
}
@Override
public MultiwayNode<T> getParent() {
return parent;
}
@Override
public <N extends Node<List<T>>> List<N> getChildren() {
if (branches == null) {
return Collections.emptyList();
}
final List multiwayNodes = new ArrayList();
for (int i = 0; i <= length; i++) {
final MultiwayNode<T> branch = branches[i];
if (branch != null) {
multiwayNodes.add(branch);
}
}
return (List<N>) multiwayNodes;
}
@Override
public String edges() {
StringBuilder sb = new StringBuilder();
this.bfs(x -> {
MultiwayNode<T> node = (MultiwayNode<T>) x;
if (node.length > 0 && node.branches != null) {
for (int i = 0; i <= node.length; i++) {
final MultiwayNode<T> branch = node.branches[i];
if (branch != null) {
sb.append(" \"").append(node.getValue()).append("\"");
sb.append(":B").append(i);
sb.append(" -> ");
sb.append("\"").append(branch.getValue()).append("\"\n");
}
}
}
return true;
});
return sb.toString();
}
protected MultiwayNode<T> nextNode(int[] ints) {
if (branches == null) {
return null;
}
MultiwayNode<T> x;
if (ints[0] == -1) {
x = this.branches[0];
} else {
x = this.branches[ints[1]];
}
return x;
}
protected boolean isLeaf() {
// is leaf 不能这么判断
if (branches == null) {
return true;
}
for (MultiwayNode<T> n : branches) {
if (n != null) {
return false;
}
}
return true;
}
protected boolean isFull() {
return length == m - 1;
}
@Override
public Map<Node<List<T>>, Integer> getXCoordinates(Map<Node<List<T>>, Integer> yCoordinates) {
// yCoordinates.replaceAll((k, v) -> v * 2);
// 按level归类,同一层x不冲突就可以了。
HashMap<Node<List<T>>, Integer> map = new HashMap<>();
int[] index = {0};
this.postOrderTwoStacks(x -> {
MultiwayNode<T> node = (MultiwayNode<T>) x;
if (node.branches != null && node.branches[0] != null) {
Integer a = map.get(node.branches[0]);
// 不是最后一个,而是非null的
if (a == null) {
a = 0;
}
MultiwayNode<T> to = null;
for (int i = 0; i <= node.length; i++) {
final MultiwayNode<T> branch = node.branches[i];
if (branch == null) {
break;
} else {
to = branch;
}
}
Integer b = map.get(to);
if (b == null) {
b = 0;
}
map.put(x, (a + b) / 2);
// 取中间值啊
} else {
map.put(x, index[0] += 2);
}
return true;
});
return map;
}
@Override
public String graphAttributes() {
StringBuilder sb = new StringBuilder();
sb.append("shape=record;label=\"{{");
boolean first = true;
for (int i = 0; i < length; i++) {
if (first) {
first = false;
} else {
sb.append("|");
}
sb.append(values[i]);
}
sb.append("}|{");
first = true;
for (int i = 0; i < length + 1; i++) {
if (first) {
first = false;
} else {
sb.append("|");
}
sb.append("<B").append(i).append(">").append(i);
}
sb.append("}}\"");
return sb.toString();
}
@Override
public String toString() {
StringBuilder sb = new StringBuilder();
boolean first = true;
for (int i = 0; i < length; i++) {
if (first) {
first = false;
} else {
sb.append("|");
}
sb.append(values[i]);
}
return sb.toString();
}
protected MultiwayNode<T> branches(int i) {
if (branches == null) {
return null;
}
return branches[i];
}
protected void branches(int i, MultiwayNode<T> node) {
if (branches == null) {
return;
}
branches[i] = node;
if (node != null) {
node.indexInParent = i;
node.parent = this;
}
}
protected void initBranches() {
if (branches == null) {
branches = new MultiwayNode[m];
}
}
protected void clearBranches() {
branches = null;
}
protected boolean isNotLeaf() {
return !isLeaf();
}
protected MultiwayNode<T> getSibling() {
// 最右边
if (indexInParent == parent.length) {
return parent.branches[indexInParent - 1];
}
// 最左边
return parent.branches[indexInParent + 1];
}
protected void moveLeft(int step) {
for (int i = 0; i < this.length - step; i++) {
this.values[i] = this.values[i + step];
}
if (this.isNotLeaf()) {
for (int i = 0; i <= this.length - step; i++) {
this.branches(i, this.branches(i + step));
}
}
}
protected void moveLeft(int from, int step) {
for (int i = from; i < this.length - step; i++) {
this.values[i] = this.values[i + step];
}
if (this.isNotLeaf()) {
for (int i = from; i <= this.length - step; i++) {
this.branches(i, this.branches(i + step));
}
}
}
protected void moveRight(int step) {
for (int i = this.length - 1; i >= 0; i--) {
this.values[i + step] = this.values[i];
}
if (this.isNotLeaf()) {
for (int i = this.length; i >= 0; i--) {
this.branches(i + step, this.branches(i));
}
}
}
}
MultiwayTree代码
package com.youngthing.trees.b;
import com.youngthing.trees.Node;
import com.youngthing.trees.Tree;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
/**
* 多路节点
* created at 09/02/2022
*
* @author 花书粉丝
* <a href="mailto://yujianbo@chtwm.com">yujianbo@chtwm.com</a>
* @since 1.0.0
*/
public class MultiwayTree<T extends Comparable<T>> implements Tree<T,List<T>> {
protected MultiwayNode<T> root;
@Override
public Node<List<T>> getRoot() {
return root;
}
@Override
public void add(T t) {
}
@Override
public void delete(T t) {
}
@Override
public Node<List<T>> findNode(T t) {
return null;
}
}
SortedMultiwayNode代码
package com.youngthing.trees.b;
/**
* 排序多路树
* created at 11/02/2022
*
* @author 花书粉丝
* <a href="mailto://yujianbo@chtwm.com">yujianbo@chtwm.com</a>
* @since 1.0.0
*/
public class SortedMultiwayNode<T extends Comparable<T>> extends MultiwayNode<T> {
public SortedMultiwayNode(int m) {
super(m);
}
/**
* 要表示找到了还是没找到
*
* @param key
* @return 数组长度为1 找到了 长度为2 没找到
*/
protected int[] binarySearch(T key) {
int from = 0;
int to = length - 1;
while (to - from > 1) {
int index = (from + to) / 2;
final int c = key.compareTo((T) values[index]);
if (c > 0) {
from = index;
} else if (c < 0) {
to = index;
} else {
return new int[]{index};
}
}
final int f = key.compareTo((T) values[from]);
if (f == 0) {
return new int[]{from};
} else if (f < 0) {
return new int[]{-1, from};
}
final int t = key.compareTo((T) values[to]);
if (t == 0) {
return new int[]{to};
} else if (t < 0) {
return new int[]{from, to};
}
return new int[]{to , to+1};
}
public MultiwayNode<T> findNode(T t) {
// 二分查找
SortedMultiwayNode<T> x = this;
while (x != null) {
final int[] ints = x.binarySearch(t);
if (ints.length == 1) {
return x;
} else if (isLeaf()) {
return null;
}
x = (SortedMultiwayNode<T>) x.nextNode(ints);
}
return null;
}
protected T min() {
SortedMultiwayNode<T> t = this;
while (!t.isLeaf()) {
t = (SortedMultiwayNode<T>) t.branches(0);
}
return (T) t.values[0];
}
}
开源地址
https://e.coding.net/buildt/data-structure/trees.git