B-tree 简易源代码实现
最近学习了B-tree,找网上源码照着实现了一个简单的版本,加了一些注释,记录巩固一下相关知识
#include<stdio.h>
#include<stdlib.h>
#define M 6
#define DEGREE (M/2)
typedef int KEY_VALUE;
typedef struct _btree_node {
KEY_VALUE *keys;
struct _btree_node **childrens;
int num;
int leaf;
}btree_node;
typedef struct _btree {
btree_node *root;
int t;
}btree;
btree_node *btree_node_create(int t, int leaf) {
btree_node *node = (btree_node*)malloc(sizeof(btree_node));
if (node == NULL) {
printf("btree_node_create : malloc failed");
}
//calloc = malloc + memset
node->leaf = leaf;
node->keys = (KEY_VALUE*)calloc(1, (2*t-1)*sizeof(KEY_VALUE));
if (node->keys == NULL) {
free(node);
return NULL;
}
node->childrens = (btree_node**)calloc(1, (2*t)*sizeof(btree_node));
if (node->childrens == NULL) {
free(node->keys);
free(node);
return NULL;
}
node->num = 0;
return node;
}
void btree_node_destroy(btree_node *node) {
if (node == NULL) {
return ;
}
if (node->childrens) {
free(node->childrens);
}
if (node->keys) {
free(node->keys);
}
free(node);
}
void btree_create(btree *T, int t) {
T->t = t;
btree_node *node = btree_node_create(t, 1);
T->root = node;
}
void btree_split_child(btree *T, btree_node *x, int i) {
int t = T->t;
btree_node *y = x->childrens[i];
//1
btree_node *z = btree_node_create(t, y->leaf);
z->num = t - 1;
//2
int j = 0;
for (j = 0; j < t-1; j ++) { //拷贝分裂节点的后t-1个关键字
z->keys[j] = y->keys[t + j];
}
if (y->leaf == 0) { //如果分裂节点非叶子节点,将其后t个孩子也拷贝给z
for (j = 0; j < t; j ++) {
z->childrens[j] = y->childrens[t + j];
}
}
y->num = t - 1; //分裂节点关键字数量减小为t-1
for (j = x->num; j >= i+1; j --) { //分裂节点的父节点的孩子后移,给新插入的子树留位置
x->childrens[j+1] = x->childrens[j];
}
x->childrens[i+1] = z; //x第i+1颗子树指向新建立的节点z
for (j = x->num-1; j >= i; j --) { //分裂节点父节点关键字后移
x->keys[j+1] = x->keys[j];
}
x->keys[i] = y->keys[t-1]; //分裂节点关键字上升
x->num += 1; //分裂节点关键字数加1
}
void btree_insert_nonfull(btree *T, btree_node *x, KEY_VALUE k) {
int i = x->num - 1;
if (x->leaf == 1) { //如果是叶子节点
while (i >= 0 && x->keys[i] > k) { //类比插入排序,查找插入位置,并后移以待插入
x->keys[i+1] = x->keys[i];
i --;
}
x->keys[i+1] = k;
x->num ++;
} else { //非叶子节点
while (i >= 0 && x->keys[i] > k) {
i --;
}
if (x->childrens[i+1]->num == (2*T->t - 1)) { //key数量已满,分裂
btree_split_child(T, x, i+1);
if (k > x->keys[i+1]) i ++; //如果分裂之后k > 上升的这个关键字,则需要插入到的子树位置需后移一位
}
btree_insert_nonfull(T, x->childrens[i+1], k); //递归插入(当前关键字非满)
}
}
//insert
int btree_insert(btree *T, KEY_VALUE key) {
btree_node *node = T->root;
if (node->num == 2 * T->t -1) { //如果当前节点关键字数已满
btree_node *root = btree_node_create(T->t, 0); //新的根节点
T->root = root;
root->childrens[0] = node; //原根结点为新根节点第一个子树
btree_split_child(T, root, 0);
int i = 0;
if (root->keys[0] < key) i ++; //分裂之后key > 第一棵子树关键字值,则往后移
btree_insert_nonfull(T, root->childrens[i], key);
} else {
btree_insert_nonfull(T, node, key);
}
}
void btree_traverse(btree_node *x) {
int i = 0;
for (i = 0; i < x->num; i ++) {
if (x->leaf == 0)
btree_traverse(x->childrens[i]);
printf("%C", x->keys[i]);
}
if (x->leaf == 0) btree_traverse(x->childrens[i]); //访问当前节点最后一个子树,如果是叶子节点,则无子树,不需访问
}
void btree_print(btree *T, btree_node *node, int layer) {
btree_node *p = node;
int i;
if (p) {
printf("\nlayer = %d keynum = %d is_leaf = %d\n", layer, p->num, p->leaf);
for (i = 0; i < node->num; i ++)
printf("%c ", p->keys[i]);
printf("\n");
#if 0
printf("%p\n", p);
for (i = 0; i = 2*T->t; i ++)
printf("%p ", p->childrens[i]);
printf("\n");
#endif
layer ++;
for (i = 0; i <= p->num; i ++)
if (p->childrens[i])
btree_print(T, p->childrens[i], layer);
} else {
printf("the tree is empty\n");
}
}
int btree_bin_search(btree_node *node, int low, int high, KEY_VALUE key) {
int mid;
if (low > high || low < 0 || high < 0) {
return -1;
}
while (low <= high) {
mid = (low + high) / 2;
if (key > node->keys[mid]) {
low = mid + 1;
} else {
high = mid - 1;
}
}
return low;
}
//{child[idx], key[idx], child[idx+1]}
void btree_merge(btree *T, btree_node *node, int idx) { //归并idx, idx+1
btree_node *left = node->childrens[idx];
btree_node *right = node->childrens[idx+1];
int i = 0;
//data merge
left->keys[T->t-1] = node->keys[idx]; //将需要下沉的关键字拷贝给合并之后的节点, left:t-1, right:t-1
for (i = 0; i < T->t-1; i ++) {
left->keys[T->t + i] = right->keys[i]; //将右边的关键字拷贝给新节点
}
if (!left->leaf) {
for (i = 0; i < T->t; i ++)
left->childrens[T->t + i] = right->childrens[i]; //将右边的子树拷贝给新节点
}
left->num += T->t; //数量加t
//destroy right
btree_node_destroy(right);
//node
for (i = idx + 1; i < node->num; i ++) { //下沉关键字原先所在节点更新
node->keys[i-1] = node->keys[i];
node->childrens[i] = node->childrens[i+1];
}
node->childrens[i+1] = NULL;
node->num -= 1;
if (node->num == 0) { //下沉关键字原先所在节点无关键字,释放
T->root = left;
btree_node_destroy(node);
}
}
void btree_delete_key(btree *T, btree_node *node, KEY_VALUE key) {
if (node == NULL) return ;
int idx = 0, i;
while (idx < node->num && key > node->keys[idx]) {
idx ++;
}
if (idx < node->num && key == node->keys[idx]) { //如果找到该key
if (node->leaf) { //叶子节点
for (i = idx; i < node->num-1; i ++) { //后移覆盖,删除key
node->keys[i] = node->keys[i+1];
}
node->keys[node->num-1] = 0; //最后一位置零
node->num --;
if (node->num == 0) { //root
free(node);
T->root = NULL;
}
return;
} else if (node->childrens[idx]->num >= T->t) { //如果该key左子树数量足够(减一后仍满足定义)
btree_node *left = node->childrens[idx];
node->keys[idx] = left->keys[left->num - 1]; //用左边最大key覆盖要删除的key
btree_delete_key(T, left, left->keys[left->num- 1]); //问题转化为删除另一个key,递归删除
} else if (node->childrens[idx+1]->num >= T->t) { //如果该key右子树数量足够(减一后仍满足定义)
btree_node *right = node->childrens[idx+1];
node->keys[idx] = right->keys[0]; //用右边最小key覆盖要删除的key
btree_delete_key(T, right, right->keys[0]); //问题转化为删除另一个key,递归删除
} else { //左右孩子都不够借
btree_merge(T, node, idx); //归并第idx个key及其左右子树
btree_delete_key(T, node->childrens[idx], key); //归并完之后删除key,删除key所在节点变为子树idx
}
} else { //如果所有关键字都小于要删除的key或者key不存在, idx == num || key < keys[idx]
btree_node *child = node->childrens[idx];
if (child == NULL) { //子树为空,查找不到,退出
printf("Cannot del key = %d\n", key);
return ;
}
//在临界值时需要先增加关键字,因为后续删除关键字可能会引起子树合并,关键字下沉,关键字下沉必须保证下沉后是安全状态,因为是自上而下的维护
if (child->num == T->t - 1) { //如果子树关键字数位于临界值,需要向兄弟借,父子换位或合并
btree_node *left = NULL;
btree_node *right = NULL;
if (idx - 1 >= 0)
left = node->childrens[idx-1];
if (idx + 1 <= node->num)
right = node->childrens[idx+1];
if ((left && left->num >= T->t) ||
(right && right->num >= T->t)) { //有一颗兄弟存在且数量够借
int richR = 0; //判定右兄弟关键字是否够借且大于左兄弟关键字个数
if (right) richR = 1;
if (left && right) richR = (right->num > left->num) ? 1 : 0;
if (right && right->num >= T->t && richR) { //右兄弟关键字数量够借且大于左兄弟
//父子换位
child->keys[child->num] = node->keys[idx]; //父节点关键字加到子树
child->childrens[child->num+1] = right->childrens[0]; //右兄弟第一个子树加到 child 的子树的最后面(因为右兄弟的第一棵子树应该是小于第一个关键字的,
//此时该关键字到了node(父节点), 所以这颗子树只能放到 child 的子树的最后面)
child->num ++; //子树关键字数加一
node->keys[idx] = right->keys[0]; //右兄弟第一个关键字覆盖刚刚的加入到 child 的 node里的关键字
//维护右兄弟关键字和子树
for (i = 0; i < right->num - 1; i ++) {
right->keys[i] = right->keys[i+1];
right->childrens[i] = right->childrens[i+1];
}
right->keys[right->num-1] = 0; //右兄弟关键字数迁移之后最后一个置零
right->childrens[right->num-1] = right->childrens[right->num]; //前移最后一刻子树
right->childrens[right->num] = NULL; //最后一棵子树置空(已经前移了)
right->num --; //右兄弟关键字数减一
} else { //左兄弟关键字数量够借且大于右兄弟
for (i = child->num; i > 0; i --) { //子树关键字后移,腾出keys[0]的位置,子树一样后移
child->keys[i] = child->keys[i-1];
child->childrens[i+1] = child->childrens[i];
}
child->childrens[1] = child->childrens[0]; //子树0-》1
child->childrens[0] = left->childrens[left->num]; //子树0被左兄弟最后一棵子树覆盖
child->keys[0] = node->keys[idx-1]; //子树关键字被父节点即node的关键字覆盖
child->num ++; //子树关键字数加一
node->keys[idx-1] = left->keys[left->num-1]; //node(父节点) 关键字被左兄弟末尾关键字覆盖
left->keys[left->num-1] = 0; //左兄弟末尾关键字置零
left->childrens[left->num] = NULL; //左兄弟末尾子树置空(因为已经被移到child上)
left->num --; //左兄弟关键字数减一
}
} else if ((!left || (left->num == T->t - 1))
&& (!right || (right->num == T->t - 1))) { //左右都不满足,归并左兄弟或右兄弟
if (left && left->num == T->t - 1) { //把child归并到左兄弟
btree_merge(T, node, idx-1);
child = left;
} else if (right && right->num == T->t - 1) { //把child归并到右胸地
btree_merge(T, node, idx);
}
}
}
btree_delete_key(T, child, key); //递归删除
}
}
int btree_delete(btree *T, KEY_VALUE key) {
if (!T->root) return -1;
btree_delete_key(T, T->root, key);
return 0;
}
int main() {
btree T = {0};
btree_create(&T, 3);
srand(48);
int i = 0;
char key[26] = "ABCDEFGHIJKLMNOPQRSTUVWXYZ";
for (i = 0; i < 26; i ++) {
//key[i] = rand() % 1000;
printf("%c ", key[i]);
btree_insert(&T, key[i]);
}
btree_print(&T, T.root, 0);
for (i = 0; i < 26; i ++) {
printf("\n---------------------------------\n");
btree_delete(&T, key[25-i]);
//btree_traverse(T.root);
btree_print(&T, T.root, 0);
}
}