B-树 实现

B-树主要用在文件系统中,关键字为文件名,值为文件在磁盘中的地址。分支因子很大,一般为50到2000,一个节点就用一个磁盘块存储。

按照《算法导论》的描述,节点有如下性质:

1、至少有t-1个关键字,至多2*t-1个关键字,关键字以升序排列;

2、至少有t个子女节点指针,至多2*t个;

3、n[x]个关键字对应n[x]+1个子女节点指针;

4、根节点至少包含一个关键字;


以下是c++实现代码:

/**
 *  BTree.h
 *
 *  B-树非回溯实现,按照《算法导论》中的实现。网上资料说,非回溯
 *  的节点分裂次数会更多,性能更差
 *  
 *  790042744@qq.com
 *  2012/2/28
 *
 **/

#ifndef BTREE_H
#define BTREE_H

#include <iostream>

#include <vector>
using std::vector;
using std::cout;
using std::endl;

/**
 *  非根节点至少有t-1个关键字和t个子女,至多2t-1个关键字和2t个子女
 *  
 *  不应该把BTreeNode暴露给用户
 **/
template< typename FileName, typename DiskAddr, int t >
class BTree;

template< typename FileName, typename DiskAddr, int t >
class BTreeNode
{
private:
	FileName filename[2*t-1];
	DiskAddr fileaddr[2*t-1];
	// 
	BTreeNode *child[2*t];

	int keycnt;
	bool isleaf;

public:
	BTreeNode(): keycnt(0), isleaf(false) 
	{
		//子女指针初始化为0
		memset( child, 0, sizeof(child) );
	};
	~BTreeNode() {};

	bool IsFull() { return (keycnt == 2*t-1); }

	//返回的Key是常量,不能修改
	FileName const GetKey(int index) {
		if ( index < 0 || index >= 2*t-1 )
			throw index;
		return filename[index];
	}
	FileName const& GetKey(int index) const {
		if ( index < 0 || index >= 2*t-1 )
			throw index;
		return filename[index];
	}

	//对应的Value也不可以修改
	DiskAddr const GetValue(int index){
		if ( index < 0 || index >= 2*t-1 )
			throw index;
		return fileaddr[index];
	}
	DiskAddr const& GetValue(int index) const{
		if ( index < 0 || index >= 2*t-1 )
			throw index;
		return fileaddr[index];
	}
	 
	int& GetKeyCnt() { return keycnt; }

	friend class BTree<FileName, DiskAddr, t>;
};

//===================================================================================================
template< typename FileName, typename DiskAddr, int t >
class BTree
{
public:
	typedef BTreeNode<FileName, DiskAddr, t> Node;
	typedef Node& RNode;
	typedef Node* PNode;
	typedef Node const * PCNode;
	typedef PNode& RPNode;

private:
	PNode root;

public:
	BTree();
	~BTree();
	PCNode Search( FileName name, int& index );
	bool ChangeFileAddr( FileName name, DiskAddr newaddr );
	bool Insert( FileName name, DiskAddr addr );
	bool Delete( FileName name );

	//test, output the whole tree
	void PrintTree();
	void PrintTree(PNode pnode);

private:
	PNode Allocate( bool isleaf=false );
	void Clear( PNode pnode );

	PNode SubTreeSearch( PNode pnode, FileName name, int& index );
	bool NodeSearch( PNode pnode, FileName name, int& index );

	bool InsertNonFull( PNode pnode, FileName, DiskAddr addr );
	void SplitChild( PNode parent, int chind, PNode child );

	bool NodeDelete( PNode pnode, FileName name );
	void MergeChild( PNode pnode, int index );
	inline void LeafNodeDelete( PNode pnode, int index );
	void InnerNodeDelete( PNode pnode, int index );
	void FromLeftSibling( PNode child, PNode parent, int chind );
	void FromRightSibling( PNode child, PNode parent, int chind );

	std::pair<FileName, DiskAddr> FindNodeMaxKeyValue( PNode pnode );
	std::pair<FileName, DiskAddr> FindNodeMinKeyValue( PNode pnode );
};

//==============================================public=====================================================
template< typename FileName, typename DiskAddr, int t >
inline BTree<FileName, DiskAddr, t>::BTree(): root(NULL)
{}

template< typename FileName, typename DiskAddr, int t >
inline BTree<FileName, DiskAddr, t>::~BTree()
{
	if ( NULL != root )
		Clear(root);
}

/*
 *  返回指向常量节点的指针,index为节点中的DiskAddr所在的序号。
 *  若失败,返回NULL, index = -1.
 */
template< typename FileName, typename DiskAddr, int t >
typename BTree<FileName, DiskAddr, t>::PCNode 
	BTree<FileName, DiskAddr, t>::Search(FileName name, int& index)
{
	if ( NULL == root )
	{
		index = -1;
		return NULL;
	}
	return SubTreeSearch( root, name, index );
}

/*
 *  修改指定key对应的value,新值为newaddr
 *  若不存在该key,返回false
 */
template< typename FileName, typename DiskAddr, int t >
bool BTree<FileName, DiskAddr, t>::ChangeFileAddr( FileName name, DiskAddr newaddr )
{
	PNode pnode;
	int index = -1;
	pnode = SubTreeSearch( root, name, index );
	if ( NULL == pnode || -1 == index )
		return false;

	pnode->fileaddr[index] = newaddr;
	return true;
}

/*
 ** 插入新关键字伪代码:
	1、如果root刚好满,分裂root,递归插入到非满的root子树中
	2、当前节点p是内节点:a 寻找插入关键字所在的子女节点child
						  b 如果child关键字个数==2*t-1,分裂child,更新p
						  c 递归将关键字插入到child中(也可能是child+1)
    3、当前节点p是叶子节点:直接插入

 *  插入新的key-value
 *  若key已存在,返回false
 */ 
template< typename FileName, typename DiskAddr, int t >
inline bool BTree<FileName, DiskAddr, t>::Insert( FileName name, DiskAddr addr )
{
	if ( NULL == root )
		root = Allocate(true);
	if ( true == root->IsFull() )
	{
		//分裂2t-1 ==> t-1 key t-1
		PNode s = Allocate();
		s->child[0] = root;
		root = s;
		SplitChild( s, 0, s->child[0] );
	}
	return InsertNonFull( root, name, addr );
}

/**
 *  删除关键字伪代码:
	1、当前节点p是内节点:
		(1)如果待删关键字不在p内,寻找该关键字所在的子女节点child
			 1a child关键字个数=t-1,向child的兄弟节点接一个关键字,如果借不到,
			    则合并一个兄弟节点;
			 1b 递归的在节点child中删除待删关键字;
		(2)2a 如果关键字的前一个子女节点的子树中,含有最大关键字keymax节点的
		        个数>=t,用keymax代替待删关键字,递归地在子树中删除keymax;
			 2b 对称的后一个子女节点
			 2c 合并待删关键字的前后两个子女节点,递归的在这个节点上删除待删关
			    关键字;
	2、当前节点是叶子节点:直接删除

 *  删除指定的key
 *  若key不存在,返回false
 */ 
template< typename FileName, typename DiskAddr, int t >
bool BTree<FileName, DiskAddr, t>::Delete( FileName name )
{
	if ( NULL != root )
		return NodeDelete( root, name );
	return false;
}

/*
 *  仅仅是为了测试
 */
template< typename FileName, typename DiskAddr, int t >
void BTree<FileName, DiskAddr, t>::PrintTree()
{
	if ( NULL == root )
		return;
	
	vector<PNode> one, two, *tmp, *tmp2, *xx;
	vector<PNode>::iterator it;
	PNode pnode;
	int level = 0, j;

	one.push_back( root );
	tmp = &one;
	tmp2 = &two;
	while ( !tmp->empty() )
	{
		cout << "level: " << level << " ";
		for ( it = tmp->begin(); it != tmp->end(); ++ it )
			PrintTree( *it );
		cout << endl;

		it = tmp->begin();
		if ( false == (*it)->isleaf )
		{
			for ( it = tmp->begin(); it != tmp->end(); ++ it )
			{
				pnode = *it;
				for ( j = 0; j < pnode->keycnt+1; ++ j )
					//if ( NULL != pnode->child[j] ) //不用判定,一定存在,个数最小是t-1
					tmp2->push_back( pnode->child[j] );
			}
		}
		tmp->clear();
		++ level;
	
		xx = tmp;
		tmp = tmp2;
		tmp2 = xx;
	}
}
template< typename FileName, typename DiskAddr, int t >
void BTree<FileName, DiskAddr, t>::PrintTree(PNode pnode)
{
	if ( NULL == pnode )
		return;
	cout << "[ ";
	for ( int i = 0; i < pnode->keycnt; ++ i )
		cout << pnode->filename[i] << "(" <<pnode->fileaddr[i] << ") ";
	cout << "] ";
}

//===============================================private====================================================
//private:
template< typename FileName, typename DiskAddr, int t >
typename BTree<FileName, DiskAddr, t>::PNode BTree<FileName, DiskAddr, t>::Allocate( bool isleaf )
{
	PNode pnode = new Node();
	pnode->isleaf = isleaf;
	return pnode;
}

/*
 *  清空以pnode为根节点的子树
 */
template< typename FileName, typename DiskAddr, int t >
void BTree<FileName, DiskAddr, t>::Clear(PNode pnode)
{
	for ( int i = 0; i < pnode->keycnt; ++ i )
	{
		if ( NULL != pnode->child[i] )
			Clear( pnode->child[i] );
	}
}

/*
 *  在以pnode为根节点的子树中查找指定的key
 *  若不存在,返回NULL,index=-1
 */
template< typename FileName, typename DiskAddr, int t >
typename BTree<FileName, DiskAddr, t>::PNode 
	BTree<FileName, DiskAddr, t>::SubTreeSearch( PNode pnode, FileName name, int& index )
{
	if ( NULL == pnode )
	{
		index = -1;
		return NULL;
	}
	int i=0;
	bool bExist;

	//使用二分搜索
	bExist = NodeSearch( pnode, name, i );
	if ( true == bExist )
	{
		index = i;
		return pnode;
	}
	else{
		if ( true == pnode->isleaf )
		{
			index = -1;
			return NULL;
		}
		return SubTreeSearch( pnode->child[i], name, index );
	}
}

/**
 *  在节点内二分搜索
 *  如果相等,返回true, index为关键字的索引号;否则,返回false,index为子女指针序号
 **/
template< typename FileName, typename DiskAddr, int m >
bool BTree<FileName, DiskAddr, m>::NodeSearch( PNode pnode, FileName name, int& index )
{
	int i, j, mid;
	i = 0;
	j = pnode->keycnt-1;
	while ( i <= j )
	{
		mid = (i+j)>>1;
		//FileName类型最低要求:operator <
		if ( pnode->filename[mid] < name )
			i = mid+1;
		else if ( name < pnode->filename[mid] )
			j = mid-1;
		else {
			index = mid;
			return true;
		}
	}
	index = i;
	return false;
}

/*
 *  在非满节点pnode中插入key-value
 *  若已存在key,返回false
 */
template< typename FileName, typename DiskAddr, int t >
bool BTree<FileName, DiskAddr, t>::InsertNonFull( PNode pnode, FileName name, DiskAddr addr )
{
	int i;
	if ( true == pnode->isleaf )
	{
		for ( i = pnode->keycnt; i > 0 && name < pnode->filename[i-1]; -- i )
		{
			pnode->filename[i] = pnode->filename[i-1];
			pnode->fileaddr[i] = pnode->fileaddr[i-1];
		}
		//如果等于:这样子对FileName最低要求 operator <.
		if ( i > 0 && !( pnode->filename[i-1] < name ) )
			return false;

		pnode->filename[i] = name;
		pnode->fileaddr[i] = addr;
		++ pnode->keycnt;
		
		return true;
	}
	else{
		
		if ( true == NodeSearch( pnode, name, i ) )
			return false;

		if ( true == pnode->child[i]->IsFull() )
		{
			SplitChild( pnode, i, pnode->child[i] );
			if ( pnode->filename[i] < name )
				++ i;
		}
		return InsertNonFull( pnode->child[i], name, addr );
	}
}

/*
 *  分裂child节点,parent->child[chind] = child
 */
template< typename FileName, typename DiskAddr, int t >
void BTree<FileName, DiskAddr, t>::SplitChild( PNode parent, int chind, PNode child )
{
	PNode n = Allocate( child->isleaf );
	n->keycnt = t-1;
	int i;
	for ( i = 0; i < t-1; ++ i )
	{
		n->filename[i] = child->filename[i+t];
		n->fileaddr[i] = child->fileaddr[i+t];
	}
	if ( false == child->isleaf )
	{
		memcpy( n->child, child->child+t, sizeof(void*)*t );
	}
	child->keycnt = t-1;

	//关键字上移
	for ( i = parent->keycnt; i > chind; -- i )
	{
		parent->filename[i] = parent->filename[i-1];
		parent->fileaddr[i] = parent->fileaddr[i-1];

		parent->child[i+1] = parent->child[i];
	}
	parent->child[chind+1] = parent->child[chind];

	parent->filename[chind] = child->filename[t-1];
	parent->fileaddr[chind] = child->fileaddr[t-1];

	++ parent->keycnt;
	parent->child[chind+1] = n;
}

/*
 *  在以pnode为根节点的子树里删除指定的key
 *  如果不存在,则返回false
 */
template< typename FileName, typename DiskAddr, int t >
bool BTree<FileName, DiskAddr, t>::NodeDelete( PNode pnode, FileName name )
{
	if ( NULL == pnode )
		return false;

	int i;
	PNode x;
	bool result = true;

	for ( i = 0; i < pnode->keycnt && pnode->filename[i] < name; ++ i );

	//name == pnode->filename[i]
	if ( i < pnode->keycnt && !( name < pnode->filename[i] ) )		//case 1 or case 2
	{
		if ( true == pnode->isleaf )		//case 1
		{
			//关键字在叶节点中,直接删除
			LeafNodeDelete( pnode, i );
		}
		else{								//case 2
			//里面需要回溯
			InnerNodeDelete( pnode, i );
		}
	}
	else if ( true == pnode->isleaf ) {
		return false;
	}
	else {															//case 3
		//关键字不在内节点中,但在某个子女节点中,该节点关键字个数>=t.
		if ( pnode->child[i]->keycnt >= t )						
			return NodeDelete( pnode->child[i], name );

		//需要保证递归时pnode->child[i]关键字个数>=t.
		if ( i > 0 && pnode->child[i-1]->keycnt >= t )
		{
			FromLeftSibling( pnode->child[i], pnode, i );
			result = NodeDelete( pnode->child[i], name );
		}
		else if ( i < pnode->keycnt && pnode->child[i+1]->keycnt >= t )
		{
			FromRightSibling( pnode->child[i], pnode, i );
			result = NodeDelete( pnode->child[i], name );
		}
		else {					//case 3b
			if ( i > 0 )
				-- i;
			MergeChild( pnode, i );
			x = pnode->child[i];
			//有种特殊情况
			if ( root == pnode && 0 == pnode->keycnt )
			{
				root = pnode->child[i];
				delete pnode;
			}

			result = NodeDelete( x, name );
		}
	}
	return result;
}

/*
 *  将pnode->child[i]、pnode->child[i+1]和pnode->filename[i]合并
 */
template< typename FileName, typename DiskAddr, int t >
void BTree<FileName, DiskAddr, t>::MergeChild( PNode pnode, int keyind )
{
	if ( NULL == pnode )
		return;

	PNode left, right;
	int i;
	left = pnode->child[keyind];
	right = pnode->child[keyind+1];

	//merging left and right node!
	for ( i = 0; i < right->keycnt; ++ i )
	{
		left->filename[left->keycnt+i+1] = right->filename[i];
		left->fileaddr[left->keycnt+i+1] = right->fileaddr[i];
	}
	if ( false == pnode->isleaf )
	{
		memcpy( left->child+left->keycnt+1, right->child, sizeof(void*)*(right->keycnt+1) );
	}
	
	//关键字下移
	left->filename[ left->keycnt ] = pnode->filename[ keyind ];
	left->fileaddr[ left->keycnt ] = pnode->fileaddr[ keyind ];
	left->keycnt += right->keycnt + 1;
	delete right;
	right = NULL;

	for ( i = keyind; i < pnode->keycnt-1; ++ i )
	{
		pnode->filename[i] = pnode->filename[i+1];
		pnode->fileaddr[i] = pnode->fileaddr[i+1];

		pnode->child[i+1] = pnode->child[i+2];
	}
	-- pnode->keycnt;
}

/*
 *  删除叶节点pnode的第index个关键字
 *  case 3a case 3b保证了pnode节点至少有t个关键字
 */
template< typename FileName, typename DiskAddr, int t >
inline void BTree<FileName, DiskAddr, t>::LeafNodeDelete( PNode pnode, int index )
{
	if ( false == pnode->isleaf )
		return;
	-- pnode->keycnt;
	for ( int i=index; i < pnode->keycnt; ++ i )
	{
		pnode->filename[i] = pnode->filename[i+1];
		pnode->fileaddr[i] = pnode->fileaddr[i+1];
	}
}

/*
 *  删除内节点pnode的第index个关键字
 *  
 */
template< typename FileName, typename DiskAddr, int t >
void BTree<FileName, DiskAddr, t>::InnerNodeDelete( PNode pnode, int index )
{
	PNode y, z, x;
	std::pair<FileName, DiskAddr> tmp;
	FileName name;
	y = NULL;

	//不用判断为NULL,必然存在child[i]、child[i+1]
	if ( pnode->child[index]->keycnt >= t )			// a)
	{
		y = pnode->child[index];
		tmp = FindNodeMaxKeyValue(y);	//递归下去
	}
	else if (  pnode->child[index+1]->keycnt >= t ) // b)
	{
		y = pnode->child[index+1];
		tmp = FindNodeMinKeyValue(y);
	}
	if ( NULL != y )
	{
		pnode->filename[index] = tmp.first;
		pnode->fileaddr[index] = tmp.second;

		NodeDelete( y, tmp.first );		//需要回溯
	}
	else {			//c)
		name = pnode->filename[index];
		MergeChild( pnode, index );
		x = pnode->child[index];
		//有种特殊情况
		if ( root == pnode && 0 == pnode->keycnt )
		{
			root = pnode->child[index];
			delete pnode;
		}

		NodeDelete( x, name );
	}
}

/*
 *  leftsibling节点最右边的关键字 --> parent节点的第chind-1个关键字 --> child节点最左边位置
 */
template< typename FileName, typename DiskAddr, int t >
void BTree<FileName, DiskAddr, t>::FromLeftSibling( PNode child, PNode parent, int chind )
{
	int i;
	PNode left;
	//child向后移动一个位置
	for ( i = child->keycnt; i > 0; -- i )
	{
		child->filename[i] = child->filename[i-1];
		child->fileaddr[i] = child->fileaddr[i-1];
		child->child[i+1] = child->child[i];
	}
	child->child[1] = child->child[0];
	left = parent->child[chind-1];
	child->child[0] = left->child[ left->keycnt ];

	//parent关键字移到child中
	child->filename[0] = parent->filename[chind-1];
	child->fileaddr[0] = parent->fileaddr[chind-1];
	++ child->keycnt;

	//left关键字移到parent中
	-- left->keycnt;
	parent->filename[chind-1] = left->filename[ left->keycnt ];
	parent->fileaddr[chind-1] = left->fileaddr[ left->keycnt ];
}

/*
 *  rightsibling节点最左边的关键字 --> parent节点的第chind个关键字 --> child节点最右边位置
 */
template< typename FileName, typename DiskAddr, int t >
void BTree<FileName, DiskAddr, t>::FromRightSibling( PNode child, PNode parent, int chind )
{
	int i;
	PNode right;

	right = parent->child[chind+1];
	//
	child->filename[ child->keycnt ] = parent->filename[ chind ];
	child->fileaddr[ child->keycnt ] = parent->fileaddr[ chind ];
	child->child[ child->keycnt+1 ] = right->child[0];
	++ child->keycnt;

	//right第一个关键字移到parent
	parent->filename[ chind ] = right->filename[0];
	parent->fileaddr[ chind ] = right->fileaddr[0];

	//right向前移动一个位置
	-- right->keycnt;
	for ( i = 0; i < right->keycnt; ++ i )
	{
		right->filename[i] = right->filename[i+1];
		right->fileaddr[i] = right->fileaddr[i+1];
		right->child[i] = right->child[i+1];
	}
	right->child[i] = right->child[i+1];
}

/*
 *  寻在以pnode为根节点的子树的最大key
 */
template< typename FileName, typename DiskAddr, int t >
std::pair<FileName, DiskAddr> BTree<FileName, DiskAddr, t>::FindNodeMaxKeyValue( PNode pnode )
{
	std::pair<FileName, DiskAddr> result;
	while ( false == pnode->isleaf )
		pnode = pnode->child[ pnode->keycnt ];
	result.first = pnode->filename[ pnode->keycnt-1 ];
	result.second = pnode->fileaddr[ pnode->keycnt-1 ];

	return result;
}

/*
 *  寻在以pnode为根节点的子树的最小key
 */
template< typename FileName, typename DiskAddr, int t >
std::pair<FileName, DiskAddr> BTree<FileName, DiskAddr, t>::FindNodeMinKeyValue( PNode pnode )
{
	std::pair<FileName, DiskAddr> result;
	while ( false == pnode->isleaf )
		pnode = pnode->child[0];
	result.first = pnode->filename[0];
	result.second = pnode->fileaddr[0];

	return result;
}
//===================================================================================================

#endif


评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值