一个B+树的C++实现
BPlusTree.h
#include <vector>
#include <list>
#include <iostream>
#include <cmath>
#include <algorithm>
#ifndef DS_BPLUSTREE
#define DS_BPLUSTREE
template <class T> class BPlusTree;
template <class T>
class BPlusTreeNode{
public:
BPlusTreeNode() : parent(NULL) {};
bool is_leaf();
bool contains(const T& key);
BPlusTreeNode* get_parent() { return parent; }
//We need this to let BPlusTree access private members
friend class BPlusTree<T>;
private:
bool contains(const T& key,std::size_t low,std::size_t high);
std::vector<T> keys;
std::vector<BPlusTreeNode*> children;
BPlusTreeNode* parent;
};
template <class T>
bool BPlusTreeNode<T>::is_leaf(){
for(unsigned int i=0; i<children.size(); i++){
if(children[i]){
return false;
}
}
return true;
}
template <class T>
bool BPlusTreeNode<T>::contains(const T& key){
return contains(key,0,keys.size()-1);
}
//Private method takes advantage of the fact keys are sorted
template <class T>
bool BPlusTreeNode<T>::contains(const T& key,std::size_t low,std::size_t high){
if(low>high){
return false;
}
if(low==high){
return key == keys[low];
}
std::size_t mid = (low+high)/2;
if(key<=keys[mid]){
return contains(key,0,mid);
}
else{
return contains(key,mid+1,high);
}
}
template <class T> class BPlusTree{
public:
BPlusTree() : root_(NULL), height_(0) {};
BPlusTree(int num_keys){this->make_tree(num_keys-1);}
BPlusTree(const BPlusTree<T>& old) : height_(old.height_), size_(old.size_){
root_ = this->copy_tree(old.root_, NULL);}
~BPlusTree(){this->destroy_tree(root_); root_ = NULL;}
BPlusTreeNode<T>* find(const T& key_value){return find(key_value, root_);}
bool insert(const T& key_value){return insert(key_value, root_);}
void print_sideways(std::ostream& ostr){print_sideways(ostr, root_, 0);}
void print_BFS(std::ostream& ostr){print_BFS(ostr, root_);}
void print_BFS_pretty(std::ofstream& ostr){};
BPlusTree& operator=(const BPlusTree<T>& old) {
if (&old != this) {
this->destroy_tree(root_);
root_ = this->copy_tree(old.root_, old.root_->parent);
size_ = old.size_;
height_ = old.height_;
}
return *this;
}
private:
BPlusTreeNode<T>* root_;
//how deep the tree is
unsigned int height_;
//how many keys each node has
unsigned int size_;
//Print a tree out sideways, same as ds_set with some modificationsvoid print_sideways(std::ostream& ostr, const BPlusTreeNode<T>* p, int depth){
if(depth == 0 && (not(p) || p->keys.size() == 0 )){
ostr << "Tree is empty.\n";
return;
}
if (p) {
unsigned int split_idx = floor(p->children.size() / 2);
//Left subtree(s)
for(unsigned int i = 0; i < split_idx; i++) print_sideways(ostr, p->children[i], depth+1);
for (int i = 0; i < depth; i++) ostr << "\t";
//Print out the keys
for(unsigned int i = 0; i < p->keys.size(); i++){
if(i != p->keys.size()-1)
ostr << p->keys[i] << ",";
else
ostr << p->keys[i];
}
ostr << "\n";
//Right subtree(s)
for(unsigned int i = split_idx; i < p->children.size(); i++) print_sideways(ostr, p->children[i], depth+1);
}
}
void print_BFS(std::ostream& ostr, BPlusTreeNode<T>* p){
//Iterative Solution
std::list<BPlusTreeNode<T>*> current_row;
std::list<BPlusTreeNode<T>*> next_row;
current_row.push_back(p);
if(not(p) || p->keys.size() == 0){
ostr << "Tree is empty.\n";
return;
}
while(not(current_row.empty())){
//Process the current row
int counter = current_row.size();
while(not(current_row.empty())){
//Reference to the front item, doesn't copy
BPlusTreeNode<T>*& temp = current_row.front();
//Print out the current row
for(unsigned int i = 0; i < temp->keys.size(); i++){
if(i != temp->keys.size()-1)
ostr << temp->keys[i] << ",";
else
ostr<< temp->keys[i];
}
//Add the children of this row to the next row
if(not(temp->is_leaf())){
for(unsigned int i = 0; i < temp->children.size(); i++){
next_row.push_back(temp->children[i]);
}
}
counter--;
current_row.pop_front(); //Advance the current "queue"
if(counter > 0)
ostr << "\t";
}
//Reset for next iteration
ostr << "\n";
current_row = next_row;
next_row.clear();
}
}
//Private part of the copy constructor
BPlusTreeNode<T>* copy_tree(BPlusTreeNode<T>* old_root, BPlusTreeNode<T>* parent){
if(old_root == NULL){
return NULL;
}
//Copy all the old data into a new node
BPlusTreeNode<T>* tmp_root = new BPlusTreeNode<T>;
tmp_root->keys = old_root->keys;
tmp_root->children = old_root->children;
tmp_root->parent = old_root->parent;
//If it isn't a root copy it's children
if(not(tmp_root->is_leaf())){
for(unsigned int i = 0; i < tmp_root->children.size(); i++){
copy_tree(tmp_root->children[i], tmp_root);
}
}
return tmp_root;
}
//Private part of the destructor, uses a BFS
void destroy_tree(BPlusTreeNode<T>* p){
std::list<BPlusTreeNode<T>* > current_row;
std::list<BPlusTreeNode<T>* > next_row;
current_row.push_back(p);
if(not(p)){
return;
}
while(not(current_row).empty()){
while(not(current_row.empty())){
//Delete everything on the current row
p = current_row.front();
for(unsigned int i = 0; i < p->children.size(); i++){
next_row.push_back(p->children[i]);
}
delete p;
p = NULL;
current_row.pop_front();
}
current_row = next_row;
next_row.clear();
}
}
//Make a root
void make_tree(int num_keys){
root_ = new BPlusTreeNode<T>;
size_ = num_keys;
height_ = 1;
}
//Given a key_value, search for the node containing that value
BPlusTreeNode<T>* find(const T& key_value, BPlusTreeNode<T>* p){
//Make sure p exists
if(not(p)){
return NULL;
}
//Make sure there are actually keys in the tree
if(p->keys.size() > 0){
//Check if key_value is in the node and the node is a leaf
if(p->contains(key_value) && p->is_leaf()){
return p;
}
//Find where it would be in the children
if(not(p->is_leaf())){
unsigned int high_idx;
unsigned int low_idx;
bool high_found = false;
bool low_found = false;
//Iterate over all the keys in p
for(unsigned int key_idx = 0; key_idx < p->keys.size(); key_idx++){
//We found an key that is greater than the one we're trying to place
//Break as we now know where to place the key, has to be before this idx
if(key_value < p->keys[key_idx]){
high_idx = key_idx;
high_found = true;
break;
}
//We found a key that is smaller than the one we're tying to place
//We can't break as there could potentially be other keys that are
//larger than key_value
else if(key_value >= p->keys[key_idx]){
low_idx = key_idx;
low_found = true;
continue;
}
}
//Two values, one lower than key value and one higher than key_value, want the subtree between these two
if(high_found && low_found){
int child_idx = ceil(float(high_idx + low_idx) / 2);
return find(key_value, p->children[child_idx]);
}
//Only values larger than key_value, want the leftmost subtree
else if(high_found && not(low_found)){
return find(key_value, p->children[0]);
}
//Only values smaller than key_value, wnat the rightmost subtree
else if(low_found && not(high_found)){
return find(key_value, p->children[p->children.size() - 1]);
}
//Didn't find it
else{
return p;
}
}
//Otherwise, if there are no children, return this node
else{
return p;
}
}
//No keys at all
else{
return NULL;
}
}
//Helper function to insert a key into it's proper place in a node, DOES NOT CHECK SIZE
void insert_key(const T& key_value, BPlusTreeNode<T>* & p, bool& high, bool& low){
//Setup a loop
unsigned int high_idx;
unsigned int low_idx;
bool high_found = false;
bool low_found = false;
for(unsigned int key_idx = 0; key_idx < p->keys.size(); key_idx++){
//If we found a key that's larger than key_value, break
if(key_value < p->keys[key_idx] && not(low_found)){
high_idx = key_idx;
high_found = true;
break;
}
//Store the last key that was smaller than key_value
else if(key_value > p->keys[key_idx]){
low_idx = key_idx;
low_found = true;
continue;
}
}
//A larger and a smaller item, insert at the larger item's location
if(high_found && low_found){
p->keys.insert(p->keys.begin() + high_idx, key_value);
}
//Smallest item, just add to the front
else if(high_found){
p->keys.insert(p->keys.begin(), key_value);
}
//Largest item, just insert to the back
else if(low_found){
p->keys.push_back(key_value);
}
high = high_found;
low = low_found;
}
//Given a node that may overflow, split it into two, adjust the tree as necessary, and return a new root node
BPlusTreeNode<T>* split_node(const T& key_value, BPlusTreeNode<T>* p){
//setup up the necessray variables
std::vector<BPlusTreeNode<T>*> children_temp = p->children;
//Insert the key_value into the node's keys
bool temp = false;
bool temp2 = false;
BPlusTreeNode<T>* temp_keys = p;
insert_key(key_value, temp_keys, temp, temp2);
std::vector<T> keys_temp = temp_keys->keys;
//Check if the node overflowed
if(keys_temp.size() <= size_ ){
p->keys = keys_temp;
while(p->parent){
p = p->parent;
}
return p;
}
//P is leaf, need to split keys into two different nodes and add
//them to P's parent as children or, if P is a root, create a new root
//and add them to that instead
if(p->is_leaf()){
//find where to split the keys
unsigned int split_idx = floor(keys_temp.size() / 2);
//Put the values into two new subtrees
BPlusTreeNode<T>* l_tree = new BPlusTreeNode<T>;
BPlusTreeNode<T>* r_tree = new BPlusTreeNode<T>;
for(unsigned int i = 0; i < keys_temp.size(); i++){
if(i < split_idx){
l_tree->keys.push_back(keys_temp[i]);
}
else{
r_tree->keys.push_back(keys_temp[i]);
}
}
//Parent node to insert the key value and children into
if(p->parent){
//Save the parent and give set the subtrees parent
BPlusTreeNode<T>* p_parent = p->parent;
l_tree->parent = p_parent;
r_tree->parent = p_parent;
//Find where p is in p's parent's children and erase it
typename std::vector<BPlusTreeNode<T>* >::iterator itr;
itr = std::find(p_parent->children.begin(), p_parent->children.end(), p);
itr = p_parent->children.erase(itr);
//insert p's children (l and r subtrees) into the children vector where p just was
itr = p_parent->children.insert(itr, l_tree);
++itr;
p_parent->children.insert(itr, r_tree);
//Delete p
delete p;
p = NULL;
//Split the parent
return split_node(r_tree->keys[0], p_parent);
}
//No parent node, p is root, only item in tree
else{
//Make a new root node, and set the subtrees parent
BPlusTreeNode<T>* temp = new BPlusTreeNode<T>;
l_tree->parent = temp;
r_tree->parent = temp;
//Add the subtrees to the new root
temp->children.push_back(l_tree);
temp->children.push_back(r_tree);
temp->keys.push_back(r_tree->keys[0]);
delete p;
p = NULL;
//return it
return temp;
}
}
//P isn't a leaf, need to divide the keys and children int two different nodes
//and add them to P's parent, or make a new root
else if (not(p->is_leaf())){
//p isn't a root
if(p->parent){
//split the node and go to the parent
unsigned int split_idx = floor(keys_temp.size() / 2);
unsigned int child_split_idx = floor(children_temp.size() / 2);
//Put the key values into two new subtrees and set their parent
BPlusTreeNode<T>* l_tree = new BPlusTreeNode<T>;
BPlusTreeNode<T>* r_tree = new BPlusTreeNode<T>;
l_tree->parent = p->parent;
r_tree->parent = p->parent;
//Divide the keys among the two nodes
for(unsigned int i = 0; i < keys_temp.size(); i++){
if(i < split_idx){
l_tree->keys.push_back(keys_temp[i]);
}else{
r_tree->keys.push_back(keys_temp[i]);
}
}
//Divide the children among the two nodes, and set their parents to be the new subtree
for(unsigned int i = 0; i < children_temp.size(); i++){
if(i < child_split_idx){
l_tree->children.push_back(children_temp[i]);
children_temp[i]->parent = l_tree;
}
else{
r_tree->children.push_back(children_temp[i]);
children_temp[i]->parent = r_tree;
}
}
//Save the parent
BPlusTreeNode<T>* p_parent = p->parent;
//Find where p is in p's parent's children and erase it
typename std::vector<BPlusTreeNode<T>* >::iterator itr;
itr = std::find(p_parent->children.begin(), p_parent->children.end(), p);
itr = p_parent->children.erase(itr);
print_sideways(std::cout, root_, 0);
//insert the sub trees into p's parent's children where p just was
itr = p_parent->children.insert(itr, l_tree);
++itr;
p_parent->children.insert(itr, r_tree);
//Delete p
delete p;
p = NULL;
//split the parent
return split_node(l_tree->keys[0], p_parent);
}
//P is a root
else{
//Make a new root node
BPlusTreeNode<T>* temp = new BPlusTreeNode<T>;
unsigned int split_idx = floor(keys_temp.size() / 2);
unsigned int child_split_idx = floor(children_temp.size() / 2);
//Put the key values into two new subtrees, an set their parents
BPlusTreeNode<T>* l_tree = new BPlusTreeNode<T>;
BPlusTreeNode<T>* r_tree = new BPlusTreeNode<T>;
l_tree->parent = temp;
r_tree->parent = temp;
//Divide the keys among the two nodes
for(unsigned int i = 0; i < keys_temp.size(); i++){
if(i < split_idx){
l_tree->keys.push_back(keys_temp[i]);
}
else if(i > split_idx){
r_tree->keys.push_back(keys_temp[i]);
}
//if even # of nodes, add the niddle node to the right subtree and
//use it as the new root
//otherwise just use it as the new root
else{
if(split_idx % 2 == 0){
r_tree->keys.push_back(keys_temp[i]);
}
temp->keys.push_back(keys_temp[i]);
}
}
//Divide the children among the two nodes and set their parents correctly
for(unsigned int i = 0; i < children_temp.size(); i++){
if(i < child_split_idx){
l_tree->children.push_back(children_temp[i]);
children_temp[i]->parent = l_tree;
}else{
r_tree->children.push_back(children_temp[i]);
children_temp[i]->parent = r_tree;
}
}
//Make them the new root's children and return the new root
temp->children.push_back(l_tree);
temp->children.push_back(r_tree);
delete p;
p = NULL;
return temp;
}
}
//Should never return this
else{
return NULL;
}
}
//Insert function
bool insert(const T& key_value, BPlusTreeNode<T>* & p){
//Nothing in p, just put the key in it
if(p->keys.size()==0){
p->keys.push_back(key_value);
return true;
}
//only item in the tree is the root p, p not full, just put the key in it
else if(p->is_leaf() && p->keys.size() < size_){
bool high_found = false;
bool low_found = false;
insert_key(key_value, p, high_found, low_found);
//Check if the key was inserted
if(high_found || low_found){
return true;
}
else{
return false;
}
}
//only item root p, p full, split it
else if(p->is_leaf() && p->keys.size() == size_){
p = split_node(key_value, p);
return true;
}
//Multiple items in p,
else{
//Find where key_value should go
BPlusTreeNode<T>* temp = find(key_value, p);
//Can fit key_value into the node
if(temp->keys.size() < size_){
bool high_found = false;
bool low_found = false;
insert_key(key_value, temp, high_found, low_found);
//Check if the key was inserted
if(high_found || low_found){
return true;
}
else{
return false;
}
}
//Can't fit key_value into the node, it overflows, so split it
else{
p = split_node(key_value, temp);
return true;
}
}
return false;
}
};
#endif