基数树是一种比较节省空间的树结构,下图展示了基数树的结构,其中key是树的构建方式,在这里,key是一个32位的整数,为了避免层数过深,所以使用两位代表子节点的索引,基数树就是依据二进制串来生成树结构。值value被储存在叶节点。
假设key=0X12345678,下图依据key来建立一棵树:
由与key是32位的,这里使用2个位用于建树,因此如果要查找一个值,最多只需要跳转16次就一定可以得出结果,是一种可以进行快速插入和查找的树。
下面是基数树的简单实现,参考了nginx里面基数树的实现。
#pragma once
#include<stdlib.h>
#include<stdio.h>
#define MEMPAGE 4096//内存页的大小,一般为4kb
#define INIT_POOL_SIZE (MEMPAGE*32) //初始内存池大小
#define INIT_FREE_SIZE (INIT_POOL_SIZE/2) //初始备用节点长度
#define INIT_NODE_NUM (16*32)
#define RADIX_INSERT_VALUE_OCCUPY -1 //该节点已被占用
#define RADIX_INSERT_VALUE_SAME -2 //插入了相同的值
#define RADIX_DELETE_ERROR -3 //删除错误
typedef unsigned int ptr_t;
typedef unsigned int uint32;
#define BITS 2
const int radix_tree_height = sizeof(ptr_t) * 8 / BITS;//树的高度
//返回key中由pos指定的位的值,位数由BITS指定
#define CHECK_BITS(key,pos) ((((unsigned int)(key))<<sizeof(int)*8-((pos)+1)*BITS)>>(sizeof(int)*8-BITS))
//基数树节点
typedef struct radix_node_t radix_node_t;
struct radix_node_t {
radix_node_t* child[4];
radix_node_t* parent;
ptr_t value;//节点储存的值
};
//使用内存池是为减少建立节点时重新申请内存的时间
//内存池描述结构,放在内存池的前段
typedef struct radix_pool {
struct radix_pool* next;//内存池是双向循环链表的一个节点
struct radix_pool* prev;
//已分配内存中还未使用的内存首地址
char* start;
//已分配内存中还未使用的内存长度
size_t size;
}radix_pool, * pool_t;
//基数树管理结构
typedef struct radix_tree_t {
//根节点
radix_node_t* root;
//内存池指针
pool_t pool;
//储存已分配但不在树中的节点(双向链表,这里储存其中的一个节点)
radix_node_t* free;
}radix_tree_t;
//内存池扩大函数,num:新内存池的大小,=-1使用默认值,单位:页
pool_t get_new_pool(radix_tree_t* t, size_t num)
{
if (num == -1)num = INIT_POOL_SIZE;
pool_t pool = (pool_t)malloc(num * MEMPAGE);
if (pool == NULL)return NULL;
pool->start = (char*)pool + sizeof(radix_pool);
pool->size = num * MEMPAGE - sizeof(radix_pool);
pool->next = t->pool->next;
pool->prev = t->pool;
t->pool->next->prev = pool;
t->pool->next = pool;
t->pool = pool;
return pool;
}
//创建一个节点,从内存池中取出可以使用的节点
radix_node_t* radix_node_alloc(radix_tree_t* t)
{
radix_node_t* node;
if (t->free != NULL) {//从free中提取节点
node = t->free;
t->free = node->parent;
}
else {//在内存池中寻找可以使用的内存
if (t->pool->size < sizeof(radix_node_t)) {//如果剩余空间不够分配,则重新分配
get_new_pool(t, -1);
}
node = (radix_node_t*)t->pool->start;
t->pool->start += sizeof(radix_node_t);
t->pool->size -= sizeof(radix_node_t);
}
node->child[0] = NULL;
node->child[1] = NULL;
node->child[2] = NULL;
node->child[3] = NULL;
node->parent = NULL;
node->value = NULL;
return node;
}
//创建管理结构
radix_tree_t* radix_tree_create()
{
int i;
radix_tree_t* tree = (radix_tree_t*)malloc(sizeof(radix_tree_t));
if (tree == NULL)return NULL;
char* p = (char*)malloc(INIT_POOL_SIZE);
radix_node_t* ns;
if (!p) {
free(tree); return NULL;
}
//为内存池结构分配空间
((pool_t)p)->next = (pool_t)p;
((pool_t)p)->prev = (pool_t)p;
ns = (radix_node_t*)((char*)p + sizeof(radix_pool));
//在内存中创建链表
for (i = 1; i < INIT_NODE_NUM - 2; ++i) {
ns[i].parent = &ns[i + 1];
}
ns[i].parent = NULL;
ns[0].child[0] = NULL;
ns[0].child[1] = NULL;
ns[0].child[2] = NULL;
ns[0].child[3] = NULL;
ns[0].parent = NULL;
ns[0].value = NULL;
tree->pool = (pool_t)p;
tree->root = ns;
tree->free = &ns[1];
((pool_t)p)->start = (char*)ns + sizeof(radix_node_t) * INIT_NODE_NUM;
((pool_t)p)->size = INIT_POOL_SIZE - sizeof(radix_pool) - sizeof(radix_node_t) * INIT_NODE_NUM;
return tree;
}
//插入
int radix_tree_insert(radix_tree_t* t, uint32 key, ptr_t value)
{
int i, temp;
radix_node_t* node, * child;
node = t->root;
for (i = 0; i < radix_tree_height; i++) {
temp = CHECK_BITS(key, i);
if (!node->child[temp]) {
child = radix_node_alloc(t);
if (!child)return NULL;
child->parent = node;
node->child[temp] = child;
node = node->child[temp];
}
else {
node = node->child[temp];
}
}
if (node->value == value)return RADIX_INSERT_VALUE_SAME;
if (node->value != NULL)return RADIX_INSERT_VALUE_OCCUPY;
node->value = value;
return 0;
}
//由于插入时会创建很多节点,为了提高删除速度这里只会删除最底层的指定节点
int radix_tree_delete(radix_tree_t* t, uint32 key)
{
radix_node_t* node = t->root, * par;
int i = 0, temp = 0;
if (node == NULL)return RADIX_DELETE_ERROR;
do {
temp = CHECK_BITS(key, i++);
node = node->child[temp];
} while (node && i < radix_tree_height);
//node为储存value的节点,在父节点中将此节点的链接置空,
//然后清空value后将此节点加入free中
if (node == NULL)return RADIX_DELETE_ERROR;
par = node->parent;
par->child[temp] = NULL;
node->value = NULL;
node->child[0] = NULL;
node->child[1] = NULL;
node->child[2] = NULL;
node->child[3] = NULL;
node->parent = t->free->parent;
t->free->parent = node;
return 0;
}
//打印函数,会打印出所有叶节点储存的值
void radix_print(radix_node_t* node)
{
if (node == NULL)return;
if (node->value != NULL)
printf("%x\n", node->value);
radix_print(node->child[0]);
radix_print(node->child[1]);
radix_print(node->child[2]);
radix_print(node->child[3]);
}
//节点查找函数
//key为索引,返回叶节点被查找到的值
ptr_t radix_tree_find(radix_tree_t* t, uint32 key)
{
int i = 0, temp;
radix_node_t* node;
node = t->root;
while (node && i < radix_tree_height) {
temp = CHECK_BITS(key, i++);
node = node->child[temp];
}
if (node == NULL)return NULL;
return node->value;
}
上面仍有值得改进的地方,比如节点结构,每一个节点不论是内部节点还是叶节点都有储存数据的value有点浪费空间。删除操作还可以优化,多删一些没用的节点,可以节省空间,虽然这样会增加删除操作占用的时间,等等。