Self-printable B+ Tree
In this project, you are supposed to implement a B+ tree of order 3, with the following operations: initialize, insert (with splitting) and search. The B+ tree should be able to print out itself.
Input Specification:
Each input file contains one test case. For each case, the first line contains a positive number N (≤104 ), the number of integer keys to be inserted. Then a line of the N positive integer keys follows. All the numbers in a line are separated by spaces.
Output Specification:
For each test case, insert the keys into an initially empty B+ tree of order 3 according to the given order. Print in a line Key X is duplicated
where X
already exists when being inserted. After all the insertions, print out the B+ tree in a top-down lever-order format as shown by the samples.
Sample Input 1:
6
7 8 9 10 7 4
Sample Output 1:
Key 7 is duplicated
[9]
[4,7,8][9,10]
Sample Input 2:
10
3 1 4 5 9 2 6 8 7 0
Sample Output 2:
[6]
[2,4][8]
[0,1][2,3][4,5][6,7][8,9]
Sample Input 3:
3
1 2 3
Sample Output 3:
[1,2,3]
Analysis:
总体来讲只是B+树的基本操作,基本只涉及Insertion与Search操作。
B+树的关键在于,要将叶子节点和内部节点的操作统一起来理解。事实上,一个满节点的插入必然意味着该节点的分裂,一个节点的分裂必然意味着其父节点的插入,这一观点对叶子与内点都适用。
例如,一个含有2,3,5的叶子节点看到 4 要插入,和,一个拥有三个子节点的内部节点看到一个新的子节点要插入,其实是一样的。
提几处注意:
- 由于叶子分裂和内点分裂的高度相似性,可以将叶子与内点采用同一数据类型struct node,设置isleaf变量来提示是否是叶子
- 设置val_num、ptr_num分别提示某节点的值数量、子节点数量。显然,对于叶子节点val_num更有意义,对于内部节点ptr_num更有意义
- 为便于内点的value(即分界值)的更新,将原本的M-1个value值存入数组val[1]-val[M-1],val[0]用于存放最左边子节点的最小value(这一数据在正常B+树的设计中并未出现)
- 在分裂时,会产生递归结构
此外,对于叶子节点中val的比较、排序和对于内部节点中子节点的比较、排序是非常类似的,因此可以考虑用C++的template function写,(需要对比较子节点大小的>、<进行重载),可以显著减少代码量。
My answer:
前面80行为大量的辅助函数、辅助数据结构。核心的函数在main( )后。
#include <stdio.h>
#include <stdlib.h>
#include <stdbool.h>
typedef struct node {
struct node* left;
int val[3]; //for nonleaf node, val[0] is the least key among its or its des' leaves
struct node* mid;
struct node* right;
bool isleaf;
int val_num;
int ptr_num;
} Node;
typedef struct bptree {
struct node* root;
struct node* leaves;
} BpTree;
#define MAXKEYN 10005
#define EMP -1
Node* stack[MAXKEYN];
Node** stackptr = stack - 1;
bool push(Node* p) {
if (stackptr == stack + MAXKEYN)return false;
*(++stackptr) = p;
return true;
}
Node* pop() {
if (stackptr == stack - 1)return NULL;
else return *(stackptr--);
}
Node* queue[MAXKEYN];
int front = 0;
int rear = -1;
bool EmptyQueue() {
if (front == (rear + 1) % MAXKEYN)return true;
else return false;
}
bool enqueue(Node* p) {
if (front == (rear + 2) % MAXKEYN)return false;
queue[++rear] = p;
return true;
}
Node* dequeue() {
if (EmptyQueue())return NULL;
else return queue[front++];
}
void Arrange3(int a, int b, int c, Node* p) {
int t;
if (a < b) { t = a; a = b; b = t; }
if (a < c) { t = a; a = c; c = t; }
if (b < c) { t = b; b = c; c = t; }
p->val[0] = c; p->val[1] = b; p->val[2] = a;
}
void Arrange4(int a, int b, int c, int d, Node* p, Node* q) {
int t;
if (a < b) { t = a; a = b; b = t; }
if (a < c) { t = a; a = c; c = t; }
if (a < d) { t = a; a = d; d = t; }
if (b < c) { t = b; b = c; c = t; }
if (b < d) { t = b; b = d; d = t; }
if (c < d) { t = c; c = d; d = t; }
p->val[0] = d; p->val[1] = c; q->val[0] = b; q->val[1] = a;
}
void InternalArrange3(Node* a, Node* b, Node* c, Node* par) {
Node* t;
if (a->val[0] < b->val[0]) { t = a; a = b; b = t; }
if (a->val[0] < c->val[0]) { t = a; a = c; c = t; }
if (b->val[0] < c->val[0]) { t = b; b = c; c = t; }
par->left = c; par->mid = b; par->right = a;
}
void InternalArrange4(Node* a, Node* b, Node* c, Node* d, Node* par1, Node* par2) {
Node* t;
if (a->val[0] < b->val[0]) { t = a; a = b; b = t; }
if (a->val[0] < c->val[0]) { t = a; a = c; c = t; }
if (a->val[0] < d->val[0]) { t = a; a = d; d = t; }
if (b->val[0] < c->val[0]) { t = b; b = c; c = t; }
if (b->val[0] < d->val[0]) { t = b; b = d; d = t; }
if (c->val[0] < d->val[0]) { t = c; c = d; d = t; }
par1->left = d; par1->mid = c; par2->left = b; par2->mid = a;
}
bool Insert(int x, BpTree* tree_add);
bool InternalInsert(Node* new_child, Node* cur_node, BpTree* tree_add);
Node* Search(int x, BpTree tree);
int main(void)
{
int n;
scanf("%d", &n);
int key[MAXKEYN] = { 0 };
for (int i = 0; i < n; i++) {
scanf("%d", &key[i]);
}
BpTree tree;
tree.leaves = NULL;
tree.root = NULL;
for (int i = 0; i < n; i++) {
if (!Insert(key[i], &tree))
printf("Key %d is duplicated\n", key[i]);
}
int cnt = 1, node_num = 0; // level order travesal
Node* T;
enqueue(tree.root);
while (!EmptyQueue()) {
T = dequeue();
printf("[");
if (T->isleaf) { // is leaf, i from 0 to val_num-1
for (int i = 0; i < T->val_num; i++) {
if (i == T->val_num - 1)printf("%d", T->val[i]);
else printf("%d,", T->val[i]);
}
}
else { // is internal node, i from 1 to ptr_num-1
for (int i = 1; i < T->ptr_num; i++) {
if (i == T->ptr_num - 1)printf("%d", T->val[i]);
else printf("%d,", T->val[i]);
}
}
printf("]");
node_num += T->ptr_num; //输出一个点后 node_num加上其子节点数 目的是得到下一层的节点个数
cnt--;
if (cnt == 0 /* && !T->isleaf */) { // after outputting a level, need an enter
printf("\n"); // but there's no enter at the end
cnt = node_num; //输出完一层后 cnt将从node_num开始递减 以确定下一层要输出多少个节点
node_num = 0; //node_num归零 以准备记录下下层的节点数
}
if (T->ptr_num >= 1) enqueue(T->left);
if (T->ptr_num >= 2) enqueue(T->mid);
if (T->ptr_num == 3) enqueue(T->right);
}
return 0;
}
bool Insert(int x, BpTree *tree_add)
{
if tree is empty
if ((*tree_add).root == NULL) {
(*tree_add).root = (*tree_add).leaves = (Node*)malloc(sizeof(Node));
(*tree_add).root->isleaf = true;
(*tree_add).root->val[0] = x;
(*tree_add).root->val[1] = (*tree_add).root->val[2] = EMP;
(*tree_add).root->val_num = 1;
(*tree_add).root->left = (*tree_add).root->mid = (*tree_add).root->right = NULL;
(*tree_add).root->ptr_num = 0;
return true;
}
if x already exists
Node* pos = Search(x, (*tree_add));
if (pos) return false;
insert x
// find the node where x should be
Node* p = (*tree_add).root;
while (!p->isleaf) {
if (p->ptr_num == 2) { // cur node has 1 sepa, stored in val1
if (x < p->val[1]) { push(p); p = p->left; }
else { push(p); p = p->mid; }
}
if (p->ptr_num == 3) { // cur node has 2 sepa, stored in val1 and val2
if (x < p->val[1]) { push(p); p = p->left; }
else if (x >= p->val[1] && x < p->val[2]) { push(p); p = p->mid; }
else { push(p); p = p->right; }
}
}
// now, p points to the leaf node which x belongs to
if (p->val_num == 1) { // no need to split
int t = p->val[0];
p->val[0] = t > x ? x : t;
p->val[1] = t > x ? t : x;
p->val[2] = EMP;
p->val_num++;
}
else if (p->val_num == 2) { // no need to split
Arrange3(p->val[0], p->val[1], x, p);
p->val_num++;
}
else { // need to split
Node* q = (Node*)malloc(sizeof(Node));
q->isleaf = true;
q->left = q->mid = q->right = NULL;
q->ptr_num = 0;
q->right = p->right; p->right = q; // insert q into the linked list of leaves
Arrange4(p->val[0], p->val[1], p->val[2], x, p, q);
p->val_num = q->val_num = 2;
Node* parent = pop();
if (parent == NULL) {
Node* newroot = (Node*)malloc(sizeof(Node));
newroot->isleaf = false;
newroot->val[0] = p->val[0];
newroot->val[1] = q->val[0];
newroot->val[2] = EMP;
newroot->val_num = 2;
newroot->left = p;
newroot->mid = q;
newroot->right = NULL;
newroot->ptr_num = 2;
(*tree_add).root = newroot;
}
else {
InternalInsert(q, parent, tree_add);
}
}
// update the vals of x's ancestors
Node* ances;
while (ances = pop()) {
if (ances->ptr_num >= 1) ances->val[0] = ances->left->val[0];
if (ances->ptr_num >= 2) ances->val[1] = ances->mid->val[0];
if (ances->ptr_num == 3) ances->val[2] = ances->right->val[0];
}
return true;
}
// new_node is generated by cur_node splitting
bool InternalInsert(Node* new_child, Node* cur_node, BpTree *tree_add)
{
if (cur_node->ptr_num == 1) { // impossible to have just 1 ptr
}
else if (cur_node->ptr_num == 2) { // no need to split
InternalArrange3(cur_node->left, cur_node->mid, new_child,
cur_node);
cur_node->ptr_num++;
// update the sepa
cur_node->val[0] = cur_node->left->val[0];
cur_node->val[1] = cur_node->mid->val[0];
cur_node->val[2] = cur_node->right->val[0];
}
else { // need to split
Node* new_node = (Node*)malloc(sizeof(Node));
new_node->isleaf = false;
new_node->left = new_node->mid = new_node->right = NULL;
new_node->ptr_num = 0;
InternalArrange4(cur_node->left, cur_node->mid, cur_node->right, new_child,
cur_node, new_node);
cur_node->ptr_num = new_node->ptr_num = 2;
// update the sepa
cur_node->val[0] = cur_node->left->val[0];
cur_node->val[1] = cur_node->mid->val[0];
cur_node->val[2] = EMP;
new_node->val[0] = new_node->left->val[0];
new_node->val[1] = new_node->mid->val[0];
new_node->val[2] = EMP;
Node* parent = pop();
if (parent == NULL) { // recursion end (cur_node is root)
Node* newroot = (Node*)malloc(sizeof(Node));
newroot->isleaf = false;
newroot->val[0] = cur_node->val[0];
newroot->val[1] = new_node->val[0];
newroot->val[2] = EMP;
newroot->val_num = 2;
newroot->left = cur_node;
newroot->mid = new_node;
newroot->right = NULL;
newroot->ptr_num = 2;
(*tree_add).root = newroot;
}
else {
InternalInsert(new_node, parent, tree_add);
}
}
return true;
}
Node* Search(int x, BpTree tree)
{
Node* p = tree.leaves;
while (p) {
if (p->val_num >= 1) {
if (p->val[0] == x)return p;
}
if (p->val_num >= 2) {
if (p->val[1] == x)return p;
}
if (p->val_num == 3) {
if (p->val[2] == x)return p;
}
p = p->right;
}
return NULL;
}