今天看Linux内核代码时,看到了radix tree,从书上大概地了解了radix tree的一些基本知识,包括它的结构和操作。由于Linux内核代码不容易看懂,因此,打算自己家先实现一下radix tree,然后再看内核中的代码,由于懒得去查radix tree的标准表示方式,所以,这里radix tree的结构与Linux内核使用的类似。
首先,简单说一下radix tree的结构。
radix tree的叶节点是存放在树中的数据,内节点包含子节点的指针,每个内节点的孩子个数固定,Linux内核一般设置为64,也就是内节点包含64个指针,内节点还包括本节点的有效孩子个数以及所在的层次,这里为了简单,并没有包含Linux内核内节点那么多数据。
radix tree的使用主要是提高查询效率,因此,这里说一下radix tree的查询操作:
每个内节点包含的指针个数是pow(2, n),好处是可以通过提取索引值的n位来得到该层的索引。由于这里采用的是pow(2, 6),因此,每层的索引值是6位。比如下图中的索引值131,转换成16进制就是0x00000083,内节点中存放的有当前节点所在的层次(从下往上计算),那么,提取第二层的索引值是2,因此取第二层的slot[2]转到第一层,然后提取第一层的索引值,是3,因此,提取第一层的slot[3],由于这是第一层了,于是,遍历结束,所得到的slot[3]就是希望查询的数据的指针。
《深入理解Linux内核》上关于radix tree的图示:
看了图应该很好理解了,直接贴代码吧,由于时间有限,没有对代码进行严格的测试,只测试了插入和查找操作,因此,代码有可能会有问题:
#include <iostream>
#include <fstream>
#include <string>
#include <cstring>
using namespace std;
typedef unsigned int u_int;
//常量定义
const u_int RADIX_TREE_SLOTS = 64; //一个内节点包含的最多的孩子个数
const u_int RADIX_MAX_LEVEL = 5; //最大的层数
const u_int radix_level[RADIX_MAX_LEVEL + 1] = { 0x00000000,
0x0000003F,
0x00000FC0,
0x0003F000,
0x00FC0000,
0x3F000000 }; //层次掩码,用于提取某一层上的索引值
//函数的返回值状态
enum radix_status {
INSERT_SUCC,
INSERT_FAIL,
DELETE_SUCC,
DELETE_FAIL
};
//radix tree的内节点
struct __radix_tree_node {
u_int height; //内节点的高度,从下往上算起
u_int count; //有效子节点的个数
void *slot[RADIX_TREE_SLOTS];
};
//radix tree的根节点
struct __radix_tree_root {
u_int height; //整个树的高度,不包括树根
__radix_tree_node *rnode;
};
//radix tree的页节点,存储在树中的数据,这里暂时将数据成员设置为string
struct radix_tree_data {
string str;
};
class radix_tree {
public:
radix_tree();
void radix_tree_stretch(); //将树向下扩展一层
void radix_tree_stretch_n(u_int); //将树扩展到n层
radix_status radix_tree_insert(u_int, radix_tree_data&); //插入一个索引值为index的数据
radix_status radix_tree_delete(u_int, radix_tree_data *&); //删除索引值为index的数据
radix_tree_data* radix_tree_lookup(u_int); //查询索引值为index的数据
void radix_tree_print(); //打印整棵树
~radix_tree();
private:
u_int get_want_level(u_int index) //从给定的索引值获取树所期望的层次
{
u_int level = RADIX_MAX_LEVEL;
while(level >= 0) {
if(index & radix_level[level]) {
return level;
}
--level;
}
return level;
}
u_int get_level_index(u_int index, u_int level) //从给定的索引值和层次,得到该层的索引值
{
return (index & radix_level[level]) >> ((level - 1) * 6);
}
__radix_tree_node* radix_tree_alloc_node() //分配一个树的内节点
{
__radix_tree_node *pr_node = new __radix_tree_node;
pr_node->height = 0;
pr_node->count = 0;
//for(u_int i = 0; i < RADIX_TREE_SLOTS; ++i)
//pr_node->slot[i] = NULL;
memset(pr_node->slot, 0, sizeof(pr_node->slot));
return pr_node;
}
__radix_tree_node* radix_tree_get_node(u_int); //根据索引值得到该索引值所在的节点
void __radix_tree_destroy_node(__radix_tree_node *&pr_node);
void radix_tree_print_node(__radix_tree_node *pr_node);
radix_status __radix_tree_insert(u_int, u_int, radix_tree_data&);
__radix_tree_root r_tree;
};
//构造函数,用于初始化radix tree
radix_tree::radix_tree()
{
r_tree.height = 0;
r_tree.rnode = NULL;
}
//销毁某个内节点,并递归销毁它的子节点
void radix_tree::__radix_tree_destroy_node(__radix_tree_node *&pr_node)
{
if(pr_node == NULL)
return;
if(pr_node->height == 1) {
delete pr_node;
pr_node = NULL;
return;
}
for(u_int i = 0; i < RADIX_TREE_SLOTS; ++i) {
__radix_tree_destroy_node((__radix_tree_node*&)(pr_node->slot[i]));
}
delete pr_node;
pr_node = NULL;
}
//析构函数,用于销毁radix tree
radix_tree::~radix_tree()
{
__radix_tree_destroy_node(r_tree.rnode);
}
//向下扩展一层:分配一个内节点,然后将这个新节点的第一个slot的指针指向根节点指向的节点
//最后更新新节点中的信息和整个树的高度
void radix_tree::radix_tree_stretch()
{
__radix_tree_node *pr_node = radix_tree_alloc_node();
pr_node->height = r_tree.height + 1;
pr_node->count = 1;
pr_node->slot[0] = r_tree.rnode;
r_tree.rnode = pr_node;
++r_tree.height;
return;
}
//向下扩展到level层
void radix_tree::radix_tree_stretch_n(u_int level)
{
u_int height = r_tree.height;
if(height >= level) {
return;
}
while(height < level) {
radix_tree_stretch();
++height;
}
}
//插入索引值的辅助函数,采用此函数时,说明该树此时的高度能够插入index值的数据
radix_status radix_tree::__radix_tree_insert(u_int index, u_int max_level, radix_tree_data& data)
{
u_int cur_index = 0;
__radix_tree_node *pr_node = r_tree.rnode, *pr_node_tmp = NULL;
while(max_level > 1) {
cur_index = get_level_index(index, max_level);
pr_node_tmp = (__radix_tree_node*)(pr_node->slot[cur_index]);
if(pr_node_tmp == NULL) {
pr_node->slot[cur_index] = radix_tree_alloc_node();
((__radix_tree_node*)(pr_node->slot[cur_index]))->height = pr_node->height - 1;
pr_node_tmp = (__radix_tree_node*)(pr_node->slot[cur_index]);
}
pr_node = pr_node_tmp;
--max_level;
}
cur_index = get_level_index(index, 1);
if(pr_node->slot[cur_index]) {
return INSERT_FAIL;
}
pr_node->count++;
pr_node->slot[cur_index] = (void*)&data;
return INSERT_SUCC;
}
//提供给用户的插入函数:如果当前的树高不足以存储index时,需要对树进行扩展
radix_status radix_tree::radix_tree_insert(u_int index, radix_tree_data& data)
{
u_int want_level = get_want_level(index);
cout << index << " want level: " << want_level << endl;
if(want_level <= r_tree.height) {
return __radix_tree_insert(index, want_level, data);
}
radix_tree_stretch_n(want_level);
cout << "stretch to " << want_level << " level" << endl;
return __radix_tree_insert(index, want_level, data);
}
//获取index所在的内节点
__radix_tree_node* radix_tree::radix_tree_get_node(u_int index)
{
u_int cur_level = r_tree.height;
u_int want_level = get_want_level(index);
u_int cur_index = 0;
__radix_tree_node *pr_node = r_tree.rnode, *pr_node_tmp = NULL;
if(want_level <= r_tree.height) {
while(cur_level > 1) {
cur_index = get_level_index(index, cur_level);
pr_node_tmp = (__radix_tree_node*)(pr_node->slot[cur_index]);
if(pr_node_tmp == NULL) {
return NULL;
}
pr_node = pr_node_tmp;
--cur_level;
}
return pr_node;
}
return NULL;
}
//提供给用户的查询函数
radix_tree_data* radix_tree::radix_tree_lookup(u_int index)
{
__radix_tree_node *pr_node = radix_tree_get_node(index);
if(pr_node == NULL) {
return NULL;
}
u_int cur_index = get_level_index(index, 1);
radix_tree_data *data = (radix_tree_data*)(pr_node->slot[cur_index]);
if(data == NULL) {
return NULL;
}
return data;
}
//提供给用户的删除函数:只是将index所在的slot置为NULL,返回存储的数据,
//当内节点并没有孩子时,并不删除内节点
radix_status radix_tree::radix_tree_delete(u_int index, radix_tree_data *& rdata)
{
__radix_tree_node *pr_node = radix_tree_get_node(index);
if(pr_node == NULL) {
return DELETE_FAIL;
}
u_int cur_index = get_level_index(index, 1);
radix_tree_data *data = (radix_tree_data*)(pr_node->slot[cur_index]);
if(data == NULL) {
rdata = NULL;
return DELETE_FAIL;
}
rdata = data;
pr_node->slot[cur_index] = NULL;
pr_node->count--;
return DELETE_SUCC;
}
//采用递归的方式打印内节点
void radix_tree::radix_tree_print_node(__radix_tree_node *pr_node)
{
if(pr_node == NULL)
return;
if(pr_node->height == 1) {
for(u_int i = 0; i < RADIX_TREE_SLOTS; ++i) {
if(pr_node->slot[i]) {
cout << ((radix_tree_data*)(pr_node->slot[i]))->str << endl;
}
}
return;
}
for(u_int i = 0; i < RADIX_TREE_SLOTS; ++i) {
radix_tree_print_node((__radix_tree_node*)(pr_node->slot[i]));
}
}
//打印整个树
void radix_tree::radix_tree_print()
{
radix_tree_print_node(r_tree.rnode);
}
int main()
{
radix_tree rdtree;
u_int index = 0;
string str;
ifstream cin("test.txt");
//test.txt文件内容:
//5 abc
//6 def
//131 mnp
radix_tree_data data1;
cin >> index >> str;
data1.str = str;
rdtree.radix_tree_insert(index, data1);
radix_tree_data data2;
cin >> index >> str;
data2.str = str;
rdtree.radix_tree_insert(index, data2);
radix_tree_data data3;
cin >> index >> str;
data3.str = str;
rdtree.radix_tree_insert(index, data3);
rdtree.radix_tree_print();
cout << "lookup start..." << endl;
radix_tree_data *pd = NULL;
pd = rdtree.radix_tree_lookup(5);
if(pd)
cout << pd->str << endl;
pd = rdtree.radix_tree_lookup(6);
if(pd)
cout << pd->str << endl;
pd = rdtree.radix_tree_lookup(131);
if(pd)
cout << pd->str << endl;
return 0;
}