一个月前看完uCore内核后,决定写一个B+树在内存中的实现巩固一下在内核代码中学习到的一些编码技巧,B+树常用于组织起数据库的索引,且叶子节点包含真实内容,可以很方便地进行区间查找,比如select mkey > 5 in table之类的,通过叶子节点的链表就可以获取后面的数据了,由于磁盘中块的大小(大多数情况下pageSize = 4096),可以让树的中间节点包含多个子节点以使每个BNode结构体在磁盘上填满每页。从而减少磁盘io,提高运行效率。
代码中的英文注释是我自己写的,可能有一些语法或者词性方面的错误(很久没有写英文了)。
我定义的BNode结构体(在内存中),其中n1为子节点个数,isDirty为相对于磁盘数据是否要写回磁盘。List_entry为组织叶子节点的双链表(该技巧从ucore内核中学来的,如果list_entry用其他数据结构组织起来(比如红黑树),那么BNode就会被红黑树组织起来,该技巧可以减少代码量。)
struct BNode
{
unsigned n1 : 10;
unsigned isLeaf : 1;
unsigned isDirty : 1;
//unsigned int n1;
//bool isLeaf;
BNode* fa;
List_entry leaflist;
BNode* childp[2 * t];//which contains the pointer of child node
//represent the block num of child(point to data block if leaf)
int ckey[2 * t];//represent the key for parition child(ckey[i] == the least key of corresponding child i)
BNode() :fa(NULL), isLeaf(true), n1(0), isDirty(0)
{
};
~BNode()
{
if (isDirty)
{
//printf("page write block num %u...\n", this);//WriteBlock func called,copy content from memory to hard disk
isDirty = 0;
}
//delete leaflist;
};
};
其中BNode中childp虽然存的是子节点的虚地址,实际应用时应该存的是子节点所在的磁盘block num,通过ReadBlock将对应的BNode的读入内存中,然后填入内存中new的一个BNode。(List_entry中prev,nxt也是同理)
ReadBlock最理想的状态是第一次由系统调用int 80触发,后面页置换出去的时候填入对应的磁盘块号,而后由14号中断(缺页)触发调入内存。
t的选择应该使得BNode的大小 == pageSize,但为了测试可以把t设的小一些(>=3).
经测试该B树叶子节点排序没有什么问题,不过中间有没有退化就不知道了,应该没有吧,毕竟我也没有怎么看B+树的定义,具体的思想可以看我写的注释。
#include <stdio.h>
#include <stdlib.h>
#include <assert.h>
#include <set>
#pragma warning(disable:4996)
#define size_int sizeof(int)
typedef unsigned int uint;
using namespace std;
const static int pageSize = 4096;
//const static int t = (pageSize - 96)/ size_int / 4;
const static int t = 3;
int mtop;
const static int bign = 105;
int mstack[bign];
const static int maxd = 2 * t - 1;
const static int mind = t - 1;
struct List_entry
{
List_entry *prev, *nxt;
}*mhead;
static int inMemory = 0;
struct BNode
{
unsigned n1 : 10;
unsigned isLeaf : 1;
unsigned isDirty : 1;
//unsigned int n1;
//bool isLeaf;
BNode* fa;
List_entry leaflist;
BNode* childp[2 * t];//which contains the pointer of child node
//represent the block num of child(point to data block if leaf)
int ckey[2 * t];//represent the key for parition child(ckey[i] == the least key of corresponding child i)
BNode() :fa(NULL), isLeaf(true), n1(0), isDirty(0)
{
};
~BNode()
{
if (isDirty)
{
//printf("page write block num %u...\n", this);//WriteBlock func called,copy content from memory to hard disk
isDirty = 0;
}
//delete leaflist;
};
};
BNode *Troot = NULL, *NodeTemplate;
bool msplit(BNode* bnode);
inline BNode* to_struct(List_entry* leafEntry)
{
if (Troot == NULL)
Troot = new BNode();
return (BNode*)((char*)leafEntry - ((char*)(&(Troot->leaflist)) - (char*)Troot));
}
inline void listAdd(List_entry* prevn, List_entry* nextn, List_entry* dataentry)
{
dataentry->prev = prevn;
dataentry->nxt = nextn;
nextn->prev = dataentry;
prevn->nxt = dataentry;
}
inline void listDel(List_entry* dataentry)
{
dataentry->prev->nxt = dataentry->nxt;
dataentry->nxt->prev = dataentry->prev;
}
inline void list_AddAfter(List_entry* listentry, List_entry* dataentry)
{
List_entry* tnxt = listentry->nxt;
listAdd(listentry, tnxt, dataentry);
}
inline BNode* prevB(BNode* u)
{
assert(u->isLeaf);
if (u->leaflist.prev != mhead)
{
return to_struct(u->leaflist.prev);
}
else
{
return NULL;
}
}
inline BNode* nextB(BNode* u)
{
assert(u->isLeaf);
if (u->leaflist.nxt != mhead)
{
return to_struct(u->leaflist.nxt);
}
else
{
return NULL;
}
}
struct Block
{
char bdata[pageSize];
};
BNode* getBlock()
{
return (BNode*)(rand() % 65536);//can be replaced by an unique num
}
//Block data if read into memory.
//BTree height increasing or decreasing only when the root node split or merge
void ReadBlock(BNode *blocknum = 0)//using virtual addr of BNode to represent the corresponding block.
{
//if blocknum not in memory
//in best case,triggered by int 14 except first time read(int 80)
//printf("page read %u...\n", blocknum);
inMemory++;
}
void WriteBlock(BNode *blocknum = 0)
{
//if blocknum is dirty
if (blocknum != NULL && blocknum->isDirty == 1)
{
//printf("page write %d...\n", blocknum);
//inMemory--;
blocknum->isDirty = 0;
}
}
void init()
{
srand(11);
Troot = new BNode();
/*
Troot->isLeaf = true;
Troot->fa = NULL;
Troot->n1 = 0;
Troot->leaflist = new List_entry();*/
mhead = new List_entry();
mhead->prev = mhead->nxt = mhead;
list_AddAfter(mhead, &(Troot->leaflist));
}
inline bool mtransfer(BNode* snode, BNode* dnode, int childno, int mflag) //matching with left rotate and right rotate in binary tree
{
if (!mflag)//mflag == 0,represent transferring to prev node
{
uint nd = dnode->n1;
dnode->childp[nd] = snode->childp[0];
dnode->ckey[nd] = snode->ckey[0];
if (!dnode->isLeaf)
dnode->childp[nd]->fa = dnode;
dnode->n1++;
snode->n1--;
for (uint i = 0; i < snode->n1; i++)
{
snode->childp[i] = snode->childp[i + 1];
snode->ckey[i] = snode->ckey[i + 1];
}
BNode* tfa = snode->fa;
ReadBlock(tfa);
tfa->ckey[childno] = snode->ckey[0];
tfa->isDirty = 1;
}
else //mflag == 1,represent transferring to next node
{
snode->n1--;
dnode->n1++;
for (uint i = dnode->n1 - 1; i > 0; i--)
{
dnode->ckey[i] = dnode->ckey[i - 1];
dnode->childp[i] = dnode->childp[i - 1];
}
int sn1 = snode->n1;
BNode* tfa = snode->fa;
ReadBlock(tfa);
tfa->ckey[childno + 1] = dnode->ckey[0] = snode->ckey[sn1];
dnode->childp[0] = snode->childp[sn1];
if (!dnode->isLeaf)
dnode->childp[0]->fa = dnode;
tfa->isDirty = 1;
}
snode->isDirty = 1;
dnode->isDirty = 1;
// WriteBlock(snode);
// WriteBlock(dnode);
return false;
}
inline bool msplit(BNode *bnode)
{
BNode* nxtbnode = new BNode();
bnode->n1 /= 2;
int bn1 = bnode->n1;
nxtbnode->n1 = bn1;
nxtbnode->isLeaf = bnode->isLeaf;//
for (int i = 0; i < bn1; i++)
{
nxtbnode->ckey[i] = bnode->ckey[i + bn1];
nxtbnode->childp[i] = bnode->childp[i + bn1];
if (!nxtbnode->isLeaf)
nxtbnode->childp[i]->fa = nxtbnode;
}
if(bnode->isLeaf == 1)
list_AddAfter(&bnode->leaflist, &nxtbnode->leaflist);
int childno = 0;
if (bnode->fa == NULL)
{
Troot = new BNode();
bnode->fa = Troot;
Troot->ckey[0] = bnode->ckey[0];
Troot->childp[0] = bnode;
Troot->n1++;
Troot->isLeaf = false;
}
else
{
childno = mstack[mtop - 1];
}
BNode *tfa = nxtbnode->fa = bnode->fa;
ReadBlock(tfa);
tfa->n1++;
for(uint i = tfa->n1 - 1; i > childno + 1 ; i--)
{
tfa->ckey[i] = tfa->ckey[i - 1];
tfa->childp[i] = tfa->childp[i - 1];
}
tfa->ckey[childno + 1] = nxtbnode->ckey[0];
tfa->childp[childno + 1] = nxtbnode;
//nxtbnode->fa = tfa;
return true;
}
inline void mmerge(BNode* u, BNode* v)
{
int tun = u->n1;
u->n1 += v->n1;
for (int i = tun; i < u->n1; i++)
{
u->childp[i] = v->childp[i - tun];
u->ckey[i] = v->ckey[i - tun];
if (!u->isLeaf)
u->childp[i]->fa = u;
}
u->isDirty = 1;
if (v->isLeaf)
{
listDel(&v->leaflist);
}
delete v;
//free space Bnode v occupied on disk
inMemory--;
BNode* ufa = u->fa;
ReadBlock(ufa);
int chno = mstack[mtop - 1];
ufa->n1--;
for (int i = chno + 1; i < ufa->n1; i++)
{
ufa->childp[i] = ufa->childp[i + 1];
ufa->ckey[i] = ufa->ckey[i + 1];
}
//ufa->ckey[chno] = u->ckey[0];
ufa->isDirty = 1;
return;
}
bool DealWithNode(BNode* &u, bool isinsert = true)
{
int chno = mstack[mtop - 1];
BNode* ufa = u->fa;
if (ufa != NULL)
{
ReadBlock(ufa);
if(!isinsert)
ufa->ckey[chno] = u->ckey[0];
}
bool mflag = false;
if (isinsert)
{
if (ufa == NULL)
{
msplit(u);
return true;
}
int un = ufa->n1;
if (chno > 0)
{
BNode* v = ufa->childp[chno - 1];
ReadBlock(v);
if (v->n1 < maxd)
{
mtransfer(u, v, chno, 0);
return true;
}
}
if (chno < un - 1)
{
BNode* v = ufa->childp[chno + 1];
ReadBlock(v);
if (v->n1 < maxd)
{
mtransfer(u, v, chno, 1);
return true;
}
}
msplit(u);
return false;
}
else
{
int un = ufa->n1;
BNode* v = NULL;
int tflag = 0;
if (chno > 0)
{
v = ufa->childp[chno - 1];
ReadBlock(v);
if (v->n1 > mind)
{
mtransfer(v, u, chno - 1, 1);
return true;
}
}
if (chno < un - 1)
{
v = ufa->childp[chno + 1];
tflag = 1;
ReadBlock(v);
if (v->n1 > mind)
{
mtransfer(v, u, chno + 1, 0);
return true;
}
}
if (tflag == 0)
{
BNode* tmp = u;
u = v;
v = tmp;
mstack[mtop - 1]--;
//ufa->ckey[mstack[mtop - 1]] = u->ckey[0];
}
mmerge(u, v);
if (ufa->n1 == 1 && ufa == Troot)
{
delete Troot;
Troot = u;
}
if (u == Troot)
return true;
return false;
}
}
void BInsert(int mkey)
{
BNode* u = Troot;
mtop = 1;
while (true)
{
ReadBlock(u);
if (u->isLeaf)
{
uint i = 0;
for (i = 0; i < u->n1; i++)
{
assert(mkey != u->ckey[i]);
if (u->ckey[i] > mkey)
{
break;
}
}
u->n1++;
for (uint j = u->n1 - 1; j > i; j--)
{
u->childp[j] = u->childp[j - 1];
u->ckey[j] = u->ckey[j - 1];
}
u->childp[i] = getBlock();
u->ckey[i] = mkey;
u->isDirty = 1;
/*
if (u->n1 > maxd)
{
bool flag = false;
if (!flag)
{
BNode *tBNode = prevB(u);
if (tBNode != NULL && tBNode->fa == u->fa && tBNode->n1 < maxd)
{
ReadBlock(u->fa);
mtransfer(u, tBNode, 0);
flag = true;
}
}
if (!flag)
{
BNode* tBNode = nextB(u);
if (tBNode != NULL && tBNode->fa == u->fa && tBNode->n1 < maxd)
{
ReadBlock(u->fa);
mtransfer(u, tBNode, 1);
flag = true;
}
}
if (!flag)
{
//ReadBlock(u->fa);
msplit(u);
//WriteBlock(u->fa);
}
}
WriteBlock(u);*/
break;
}
else
{
uint i = 0;
for (i = 0; i < u->n1; i++)
{
assert(mkey != u->ckey[i]);
if (u->ckey[i] > mkey)
{
if (i > 0)
i--;
break;
}
}
if (i == u->n1 && i > 0)
i--;
if (i == 0)
{
if (mkey < u->ckey[i])
{
u->ckey[i] = mkey;
u->isDirty = 1;
//WriteBlock(u);
}
}
mstack[mtop++] = i;
u = u->childp[i];
}
}
//u = u->fa;
while(u != NULL)
{
ReadBlock(u);
if (u->n1 <= maxd)
{
// WriteBlock(u);
break;
}
bool flag = false;
flag = DealWithNode(u, true);
if (!flag)
u = u->fa;
else
break;
mtop--;
}
}
inline void fixchno(BNode *u)
{
while (u != Troot)
{
int chno = mstack[mtop - 1];
BNode *ufa = u->fa;
ReadBlock(ufa);
assert(ufa->childp[chno] == u);
if (ufa->ckey[chno] == u->ckey[0])
{
break;
}
else
{
ufa->ckey[chno] = u->ckey[0];
ufa->isDirty = 1;
//WriteBlock(ufa);
}
u = ufa;
mtop--;
}
}
void BDelete(int mkey)
{
BNode* u = Troot;
mtop = 1;
while (true)
{
ReadBlock(u);
if (u->isLeaf)
{
uint i = 0;
for (i = 0; i < u->n1; i++)
{
if (u->ckey[i] == mkey)
{
break;
}
}
u->n1--;
for (uint j = i; j < u->n1; j++)
{
u->childp[j] = u->childp[j + 1];
u->ckey[j] = u->ckey[j + 1];
}
u->isDirty = 1;
break;
}
else
{
uint i = 0;
for (i = 0; i < u->n1; i++)
{
if (u->ckey[i] > mkey)
break;
}
assert(i > 0);
i--;
mstack[mtop++] = i;
u = u->childp[i];
}
}
while (u != Troot)
{
ReadBlock(u);
if (u->n1 >= mind)
{
WriteBlock(u);
fixchno(u);
break;
}
bool flag = false;
//BNode* oldu = u;
flag = DealWithNode(u, false);
//assert(oldu == u);
u = u->fa;
mtop--;
if (flag)
{
fixchno(u);
break;
}
}
}
const int bigm = 1000000;
//int mark[bign];
set<int> used_set;
inline int getrand()
{
return (rand() * 32767ll + rand()) % bigm;
}
void Btest()
{
used_set.clear();
int nown = 0;
for (int i = 0; i < bigm; i++)
{
if (nown < 5000)
{
int x = getrand();
int cnt = 0;
for (int j = x; j != x - 1; j = (j + 1) % bigm)
{
cnt++;
if (used_set.count(j) == 0)
{
used_set.insert(j);
nown++;
BInsert(j);
break;
}
else if(cnt > 10)
{
BDelete(j);
used_set.erase(j);
nown--;
}
}
}
else
{
int x = getrand();
if (x % 2 == 0)
{
int cnt = 0;
for (int j = x; j != x - 1; j = (j + 1) % bigm)
{
cnt++;
if (used_set.count(j) == 0)
{
used_set.insert(j);
BInsert(j);
nown++;
break;
}
else if (cnt > 10)
{
BDelete(j);
used_set.erase(j);
nown--;
break;
}
}
}
else
{
auto it = used_set.lower_bound(x);
if (it != used_set.end())
{
x = *it;
}
else
{
x = *used_set.begin();
}
BDelete(x);
used_set.erase(x);
nown--;
}
}
if (i > 0 && i % 100 == 0)
{
int lastval = -1;
int cnt = 0;
for (List_entry* j = mhead->nxt; j != mhead; j = j->nxt)
{
BNode* tb = to_struct(j);
assert(tb->ckey[0] > lastval);
lastval = tb->ckey[tb->n1 - 1];
if (rand() % 30 == 0)
{
for (int k = 1; k < tb->n1; k++)
{
assert(tb->ckey[k] > tb->ckey[k - 1]);
}
}
cnt += tb->n1;
}
assert(cnt == nown);
}
}
}
int main()
{
//int blocknum = 102;
//printf("%d\n", sizeof(dxfile));
init();
Btest();
//printf("%d\n", sizeof(BNode));
//printf("%d\n", Troot->isLeaf);
}
//flush all dirty BNode to disk when being exchanged out of memory or when program exited(WriteBlock called)
这份代码刚开始的时候是随便写的,不过后面写着写着发现挺有意思的就比较认真了(还写了测试代码),写B树主要是因为只有它没有写过。学完uCore以后我觉得当年要是读个计算机博士什么的早就毕业了。