B-树主要用在文件系统中,关键字为文件名,值为文件在磁盘中的地址。分支因子很大,一般为50到2000,一个节点就用一个磁盘块存储。
按照《算法导论》的描述,节点有如下性质:
1、至少有t-1个关键字,至多2*t-1个关键字,关键字以升序排列;
2、至少有t个子女节点指针,至多2*t个;
3、n[x]个关键字对应n[x]+1个子女节点指针;
4、根节点至少包含一个关键字;
以下是c++实现代码:
/**
* BTree.h
*
* B-树非回溯实现,按照《算法导论》中的实现。网上资料说,非回溯
* 的节点分裂次数会更多,性能更差
*
* 790042744@qq.com
* 2012/2/28
*
**/
#ifndef BTREE_H
#define BTREE_H
#include <iostream>
#include <vector>
using std::vector;
using std::cout;
using std::endl;
/**
* 非根节点至少有t-1个关键字和t个子女,至多2t-1个关键字和2t个子女
*
* 不应该把BTreeNode暴露给用户
**/
template< typename FileName, typename DiskAddr, int t >
class BTree;
template< typename FileName, typename DiskAddr, int t >
class BTreeNode
{
private:
FileName filename[2*t-1];
DiskAddr fileaddr[2*t-1];
//
BTreeNode *child[2*t];
int keycnt;
bool isleaf;
public:
BTreeNode(): keycnt(0), isleaf(false)
{
//子女指针初始化为0
memset( child, 0, sizeof(child) );
};
~BTreeNode() {};
bool IsFull() { return (keycnt == 2*t-1); }
//返回的Key是常量,不能修改
FileName const GetKey(int index) {
if ( index < 0 || index >= 2*t-1 )
throw index;
return filename[index];
}
FileName const& GetKey(int index) const {
if ( index < 0 || index >= 2*t-1 )
throw index;
return filename[index];
}
//对应的Value也不可以修改
DiskAddr const GetValue(int index){
if ( index < 0 || index >= 2*t-1 )
throw index;
return fileaddr[index];
}
DiskAddr const& GetValue(int index) const{
if ( index < 0 || index >= 2*t-1 )
throw index;
return fileaddr[index];
}
int& GetKeyCnt() { return keycnt; }
friend class BTree<FileName, DiskAddr, t>;
};
//===================================================================================================
template< typename FileName, typename DiskAddr, int t >
class BTree
{
public:
typedef BTreeNode<FileName, DiskAddr, t> Node;
typedef Node& RNode;
typedef Node* PNode;
typedef Node const * PCNode;
typedef PNode& RPNode;
private:
PNode root;
public:
BTree();
~BTree();
PCNode Search( FileName name, int& index );
bool ChangeFileAddr( FileName name, DiskAddr newaddr );
bool Insert( FileName name, DiskAddr addr );
bool Delete( FileName name );
//test, output the whole tree
void PrintTree();
void PrintTree(PNode pnode);
private:
PNode Allocate( bool isleaf=false );
void Clear( PNode pnode );
PNode SubTreeSearch( PNode pnode, FileName name, int& index );
bool NodeSearch( PNode pnode, FileName name, int& index );
bool InsertNonFull( PNode pnode, FileName, DiskAddr addr );
void SplitChild( PNode parent, int chind, PNode child );
bool NodeDelete( PNode pnode, FileName name );
void MergeChild( PNode pnode, int index );
inline void LeafNodeDelete( PNode pnode, int index );
void InnerNodeDelete( PNode pnode, int index );
void FromLeftSibling( PNode child, PNode parent, int chind );
void FromRightSibling( PNode child, PNode parent, int chind );
std::pair<FileName, DiskAddr> FindNodeMaxKeyValue( PNode pnode );
std::pair<FileName, DiskAddr> FindNodeMinKeyValue( PNode pnode );
};
//==============================================public=====================================================
template< typename FileName, typename DiskAddr, int t >
inline BTree<FileName, DiskAddr, t>::BTree(): root(NULL)
{}
template< typename FileName, typename DiskAddr, int t >
inline BTree<FileName, DiskAddr, t>::~BTree()
{
if ( NULL != root )
Clear(root);
}
/*
* 返回指向常量节点的指针,index为节点中的DiskAddr所在的序号。
* 若失败,返回NULL, index = -1.
*/
template< typename FileName, typename DiskAddr, int t >
typename BTree<FileName, DiskAddr, t>::PCNode
BTree<FileName, DiskAddr, t>::Search(FileName name, int& index)
{
if ( NULL == root )
{
index = -1;
return NULL;
}
return SubTreeSearch( root, name, index );
}
/*
* 修改指定key对应的value,新值为newaddr
* 若不存在该key,返回false
*/
template< typename FileName, typename DiskAddr, int t >
bool BTree<FileName, DiskAddr, t>::ChangeFileAddr( FileName name, DiskAddr newaddr )
{
PNode pnode;
int index = -1;
pnode = SubTreeSearch( root, name, index );
if ( NULL == pnode || -1 == index )
return false;
pnode->fileaddr[index] = newaddr;
return true;
}
/*
** 插入新关键字伪代码:
1、如果root刚好满,分裂root,递归插入到非满的root子树中
2、当前节点p是内节点:a 寻找插入关键字所在的子女节点child
b 如果child关键字个数==2*t-1,分裂child,更新p
c 递归将关键字插入到child中(也可能是child+1)
3、当前节点p是叶子节点:直接插入
* 插入新的key-value
* 若key已存在,返回false
*/
template< typename FileName, typename DiskAddr, int t >
inline bool BTree<FileName, DiskAddr, t>::Insert( FileName name, DiskAddr addr )
{
if ( NULL == root )
root = Allocate(true);
if ( true == root->IsFull() )
{
//分裂2t-1 ==> t-1 key t-1
PNode s = Allocate();
s->child[0] = root;
root = s;
SplitChild( s, 0, s->child[0] );
}
return InsertNonFull( root, name, addr );
}
/**
* 删除关键字伪代码:
1、当前节点p是内节点:
(1)如果待删关键字不在p内,寻找该关键字所在的子女节点child
1a child关键字个数=t-1,向child的兄弟节点接一个关键字,如果借不到,
则合并一个兄弟节点;
1b 递归的在节点child中删除待删关键字;
(2)2a 如果关键字的前一个子女节点的子树中,含有最大关键字keymax节点的
个数>=t,用keymax代替待删关键字,递归地在子树中删除keymax;
2b 对称的后一个子女节点
2c 合并待删关键字的前后两个子女节点,递归的在这个节点上删除待删关
关键字;
2、当前节点是叶子节点:直接删除
* 删除指定的key
* 若key不存在,返回false
*/
template< typename FileName, typename DiskAddr, int t >
bool BTree<FileName, DiskAddr, t>::Delete( FileName name )
{
if ( NULL != root )
return NodeDelete( root, name );
return false;
}
/*
* 仅仅是为了测试
*/
template< typename FileName, typename DiskAddr, int t >
void BTree<FileName, DiskAddr, t>::PrintTree()
{
if ( NULL == root )
return;
vector<PNode> one, two, *tmp, *tmp2, *xx;
vector<PNode>::iterator it;
PNode pnode;
int level = 0, j;
one.push_back( root );
tmp = &one;
tmp2 = &two;
while ( !tmp->empty() )
{
cout << "level: " << level << " ";
for ( it = tmp->begin(); it != tmp->end(); ++ it )
PrintTree( *it );
cout << endl;
it = tmp->begin();
if ( false == (*it)->isleaf )
{
for ( it = tmp->begin(); it != tmp->end(); ++ it )
{
pnode = *it;
for ( j = 0; j < pnode->keycnt+1; ++ j )
//if ( NULL != pnode->child[j] ) //不用判定,一定存在,个数最小是t-1
tmp2->push_back( pnode->child[j] );
}
}
tmp->clear();
++ level;
xx = tmp;
tmp = tmp2;
tmp2 = xx;
}
}
template< typename FileName, typename DiskAddr, int t >
void BTree<FileName, DiskAddr, t>::PrintTree(PNode pnode)
{
if ( NULL == pnode )
return;
cout << "[ ";
for ( int i = 0; i < pnode->keycnt; ++ i )
cout << pnode->filename[i] << "(" <<pnode->fileaddr[i] << ") ";
cout << "] ";
}
//===============================================private====================================================
//private:
template< typename FileName, typename DiskAddr, int t >
typename BTree<FileName, DiskAddr, t>::PNode BTree<FileName, DiskAddr, t>::Allocate( bool isleaf )
{
PNode pnode = new Node();
pnode->isleaf = isleaf;
return pnode;
}
/*
* 清空以pnode为根节点的子树
*/
template< typename FileName, typename DiskAddr, int t >
void BTree<FileName, DiskAddr, t>::Clear(PNode pnode)
{
for ( int i = 0; i < pnode->keycnt; ++ i )
{
if ( NULL != pnode->child[i] )
Clear( pnode->child[i] );
}
}
/*
* 在以pnode为根节点的子树中查找指定的key
* 若不存在,返回NULL,index=-1
*/
template< typename FileName, typename DiskAddr, int t >
typename BTree<FileName, DiskAddr, t>::PNode
BTree<FileName, DiskAddr, t>::SubTreeSearch( PNode pnode, FileName name, int& index )
{
if ( NULL == pnode )
{
index = -1;
return NULL;
}
int i=0;
bool bExist;
//使用二分搜索
bExist = NodeSearch( pnode, name, i );
if ( true == bExist )
{
index = i;
return pnode;
}
else{
if ( true == pnode->isleaf )
{
index = -1;
return NULL;
}
return SubTreeSearch( pnode->child[i], name, index );
}
}
/**
* 在节点内二分搜索
* 如果相等,返回true, index为关键字的索引号;否则,返回false,index为子女指针序号
**/
template< typename FileName, typename DiskAddr, int m >
bool BTree<FileName, DiskAddr, m>::NodeSearch( PNode pnode, FileName name, int& index )
{
int i, j, mid;
i = 0;
j = pnode->keycnt-1;
while ( i <= j )
{
mid = (i+j)>>1;
//FileName类型最低要求:operator <
if ( pnode->filename[mid] < name )
i = mid+1;
else if ( name < pnode->filename[mid] )
j = mid-1;
else {
index = mid;
return true;
}
}
index = i;
return false;
}
/*
* 在非满节点pnode中插入key-value
* 若已存在key,返回false
*/
template< typename FileName, typename DiskAddr, int t >
bool BTree<FileName, DiskAddr, t>::InsertNonFull( PNode pnode, FileName name, DiskAddr addr )
{
int i;
if ( true == pnode->isleaf )
{
for ( i = pnode->keycnt; i > 0 && name < pnode->filename[i-1]; -- i )
{
pnode->filename[i] = pnode->filename[i-1];
pnode->fileaddr[i] = pnode->fileaddr[i-1];
}
//如果等于:这样子对FileName最低要求 operator <.
if ( i > 0 && !( pnode->filename[i-1] < name ) )
return false;
pnode->filename[i] = name;
pnode->fileaddr[i] = addr;
++ pnode->keycnt;
return true;
}
else{
if ( true == NodeSearch( pnode, name, i ) )
return false;
if ( true == pnode->child[i]->IsFull() )
{
SplitChild( pnode, i, pnode->child[i] );
if ( pnode->filename[i] < name )
++ i;
}
return InsertNonFull( pnode->child[i], name, addr );
}
}
/*
* 分裂child节点,parent->child[chind] = child
*/
template< typename FileName, typename DiskAddr, int t >
void BTree<FileName, DiskAddr, t>::SplitChild( PNode parent, int chind, PNode child )
{
PNode n = Allocate( child->isleaf );
n->keycnt = t-1;
int i;
for ( i = 0; i < t-1; ++ i )
{
n->filename[i] = child->filename[i+t];
n->fileaddr[i] = child->fileaddr[i+t];
}
if ( false == child->isleaf )
{
memcpy( n->child, child->child+t, sizeof(void*)*t );
}
child->keycnt = t-1;
//关键字上移
for ( i = parent->keycnt; i > chind; -- i )
{
parent->filename[i] = parent->filename[i-1];
parent->fileaddr[i] = parent->fileaddr[i-1];
parent->child[i+1] = parent->child[i];
}
parent->child[chind+1] = parent->child[chind];
parent->filename[chind] = child->filename[t-1];
parent->fileaddr[chind] = child->fileaddr[t-1];
++ parent->keycnt;
parent->child[chind+1] = n;
}
/*
* 在以pnode为根节点的子树里删除指定的key
* 如果不存在,则返回false
*/
template< typename FileName, typename DiskAddr, int t >
bool BTree<FileName, DiskAddr, t>::NodeDelete( PNode pnode, FileName name )
{
if ( NULL == pnode )
return false;
int i;
PNode x;
bool result = true;
for ( i = 0; i < pnode->keycnt && pnode->filename[i] < name; ++ i );
//name == pnode->filename[i]
if ( i < pnode->keycnt && !( name < pnode->filename[i] ) ) //case 1 or case 2
{
if ( true == pnode->isleaf ) //case 1
{
//关键字在叶节点中,直接删除
LeafNodeDelete( pnode, i );
}
else{ //case 2
//里面需要回溯
InnerNodeDelete( pnode, i );
}
}
else if ( true == pnode->isleaf ) {
return false;
}
else { //case 3
//关键字不在内节点中,但在某个子女节点中,该节点关键字个数>=t.
if ( pnode->child[i]->keycnt >= t )
return NodeDelete( pnode->child[i], name );
//需要保证递归时pnode->child[i]关键字个数>=t.
if ( i > 0 && pnode->child[i-1]->keycnt >= t )
{
FromLeftSibling( pnode->child[i], pnode, i );
result = NodeDelete( pnode->child[i], name );
}
else if ( i < pnode->keycnt && pnode->child[i+1]->keycnt >= t )
{
FromRightSibling( pnode->child[i], pnode, i );
result = NodeDelete( pnode->child[i], name );
}
else { //case 3b
if ( i > 0 )
-- i;
MergeChild( pnode, i );
x = pnode->child[i];
//有种特殊情况
if ( root == pnode && 0 == pnode->keycnt )
{
root = pnode->child[i];
delete pnode;
}
result = NodeDelete( x, name );
}
}
return result;
}
/*
* 将pnode->child[i]、pnode->child[i+1]和pnode->filename[i]合并
*/
template< typename FileName, typename DiskAddr, int t >
void BTree<FileName, DiskAddr, t>::MergeChild( PNode pnode, int keyind )
{
if ( NULL == pnode )
return;
PNode left, right;
int i;
left = pnode->child[keyind];
right = pnode->child[keyind+1];
//merging left and right node!
for ( i = 0; i < right->keycnt; ++ i )
{
left->filename[left->keycnt+i+1] = right->filename[i];
left->fileaddr[left->keycnt+i+1] = right->fileaddr[i];
}
if ( false == pnode->isleaf )
{
memcpy( left->child+left->keycnt+1, right->child, sizeof(void*)*(right->keycnt+1) );
}
//关键字下移
left->filename[ left->keycnt ] = pnode->filename[ keyind ];
left->fileaddr[ left->keycnt ] = pnode->fileaddr[ keyind ];
left->keycnt += right->keycnt + 1;
delete right;
right = NULL;
for ( i = keyind; i < pnode->keycnt-1; ++ i )
{
pnode->filename[i] = pnode->filename[i+1];
pnode->fileaddr[i] = pnode->fileaddr[i+1];
pnode->child[i+1] = pnode->child[i+2];
}
-- pnode->keycnt;
}
/*
* 删除叶节点pnode的第index个关键字
* case 3a case 3b保证了pnode节点至少有t个关键字
*/
template< typename FileName, typename DiskAddr, int t >
inline void BTree<FileName, DiskAddr, t>::LeafNodeDelete( PNode pnode, int index )
{
if ( false == pnode->isleaf )
return;
-- pnode->keycnt;
for ( int i=index; i < pnode->keycnt; ++ i )
{
pnode->filename[i] = pnode->filename[i+1];
pnode->fileaddr[i] = pnode->fileaddr[i+1];
}
}
/*
* 删除内节点pnode的第index个关键字
*
*/
template< typename FileName, typename DiskAddr, int t >
void BTree<FileName, DiskAddr, t>::InnerNodeDelete( PNode pnode, int index )
{
PNode y, z, x;
std::pair<FileName, DiskAddr> tmp;
FileName name;
y = NULL;
//不用判断为NULL,必然存在child[i]、child[i+1]
if ( pnode->child[index]->keycnt >= t ) // a)
{
y = pnode->child[index];
tmp = FindNodeMaxKeyValue(y); //递归下去
}
else if ( pnode->child[index+1]->keycnt >= t ) // b)
{
y = pnode->child[index+1];
tmp = FindNodeMinKeyValue(y);
}
if ( NULL != y )
{
pnode->filename[index] = tmp.first;
pnode->fileaddr[index] = tmp.second;
NodeDelete( y, tmp.first ); //需要回溯
}
else { //c)
name = pnode->filename[index];
MergeChild( pnode, index );
x = pnode->child[index];
//有种特殊情况
if ( root == pnode && 0 == pnode->keycnt )
{
root = pnode->child[index];
delete pnode;
}
NodeDelete( x, name );
}
}
/*
* leftsibling节点最右边的关键字 --> parent节点的第chind-1个关键字 --> child节点最左边位置
*/
template< typename FileName, typename DiskAddr, int t >
void BTree<FileName, DiskAddr, t>::FromLeftSibling( PNode child, PNode parent, int chind )
{
int i;
PNode left;
//child向后移动一个位置
for ( i = child->keycnt; i > 0; -- i )
{
child->filename[i] = child->filename[i-1];
child->fileaddr[i] = child->fileaddr[i-1];
child->child[i+1] = child->child[i];
}
child->child[1] = child->child[0];
left = parent->child[chind-1];
child->child[0] = left->child[ left->keycnt ];
//parent关键字移到child中
child->filename[0] = parent->filename[chind-1];
child->fileaddr[0] = parent->fileaddr[chind-1];
++ child->keycnt;
//left关键字移到parent中
-- left->keycnt;
parent->filename[chind-1] = left->filename[ left->keycnt ];
parent->fileaddr[chind-1] = left->fileaddr[ left->keycnt ];
}
/*
* rightsibling节点最左边的关键字 --> parent节点的第chind个关键字 --> child节点最右边位置
*/
template< typename FileName, typename DiskAddr, int t >
void BTree<FileName, DiskAddr, t>::FromRightSibling( PNode child, PNode parent, int chind )
{
int i;
PNode right;
right = parent->child[chind+1];
//
child->filename[ child->keycnt ] = parent->filename[ chind ];
child->fileaddr[ child->keycnt ] = parent->fileaddr[ chind ];
child->child[ child->keycnt+1 ] = right->child[0];
++ child->keycnt;
//right第一个关键字移到parent
parent->filename[ chind ] = right->filename[0];
parent->fileaddr[ chind ] = right->fileaddr[0];
//right向前移动一个位置
-- right->keycnt;
for ( i = 0; i < right->keycnt; ++ i )
{
right->filename[i] = right->filename[i+1];
right->fileaddr[i] = right->fileaddr[i+1];
right->child[i] = right->child[i+1];
}
right->child[i] = right->child[i+1];
}
/*
* 寻在以pnode为根节点的子树的最大key
*/
template< typename FileName, typename DiskAddr, int t >
std::pair<FileName, DiskAddr> BTree<FileName, DiskAddr, t>::FindNodeMaxKeyValue( PNode pnode )
{
std::pair<FileName, DiskAddr> result;
while ( false == pnode->isleaf )
pnode = pnode->child[ pnode->keycnt ];
result.first = pnode->filename[ pnode->keycnt-1 ];
result.second = pnode->fileaddr[ pnode->keycnt-1 ];
return result;
}
/*
* 寻在以pnode为根节点的子树的最小key
*/
template< typename FileName, typename DiskAddr, int t >
std::pair<FileName, DiskAddr> BTree<FileName, DiskAddr, t>::FindNodeMinKeyValue( PNode pnode )
{
std::pair<FileName, DiskAddr> result;
while ( false == pnode->isleaf )
pnode = pnode->child[0];
result.first = pnode->filename[0];
result.second = pnode->fileaddr[0];
return result;
}
//===================================================================================================
#endif