概要
前缀树,又叫字典树,trie树。是一种多叉树。
用途
- 单词补全/预测
- 拼写检查
- 9建输入
- IP路由查找(最长前缀匹配)
- 数组中两个树最大异或值
特点
-
根节点是空字符
-
每个节点所有子节点都不同
-
根到叶子,路径上所有字符连接即使该节点对应的字符串
[外链图片转存失败,源站可能有防盗链机制,建议将图片保存下来直接上传(img-66WdAdSm-1627787560898)(E:\csdn_demo\算法\前缀树\前缀树图示.png)]
存储
数组形式
用空间换时间,每个节点使用数组保存,一个字符对应一个槽位,共26个槽位。
struct trieNode{
unsigned char isLeaf;
struct *trineNode childArray[26];
}
hashmap形式
节省内存,但是查找效率降低。
下面以单词预测为例,展示代码实现。
单词预测
问题:字符串"ba", “bad”, “da”, “dad”, “cc”; 输入"ba", 单词预测,输出“bad”,“ba”。
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
struct trieNode{
unsigned char isLeaf;
struct trieNode * childArray[26];
char *payload;
};
struct trieNode head;
/*插入函数*/
int insert(struct trieNode* head, char * str)
{
struct trieNode* pItr = head;
struct trieNode* pTmp = NULL;
char tmp = 0;
int idx = 0;
char* pTmpStr = str;
while((tmp = *(pTmpStr++)) != 0 && tmp >= 'a' && tmp <= 'z')
{
idx = tmp - 'a';
pTmp = pItr->childArray[idx];
if (pTmp == NULL)
{
pTmp = malloc(sizeof(struct trieNode));
if (pTmp == NULL)
{
return -1;
}
memset(pTmp, 0, sizeof(struct trieNode));
pItr->childArray[idx] = pTmp;
}
pItr = pTmp;
}
if (tmp == 0)
{
pItr->isLeaf = 1;
pItr->payload = str;
}
return 0;
}
/*查找前缀*/
int searchPrefix(struct trieNode* head, char * str)
{
struct trieNode* pItr = head;
char tmp = 0;
int idx = 0;
while((tmp = *str++) != 0 && tmp >= 'a' && tmp <= 'z')
{
idx = tmp - 'a';
pItr = pItr->childArray[idx];
if (pItr == NULL)
{
return -1;
}
}
/*查到前缀*/
return walkTrie(pItr);
}
/*深度优先遍历*/
int walkTrie(struct trieNode* head)
{
int i = 0;
if (head->isLeaf)
{
printf("%s\n", head->payload);
}
for (i = 0; i < 26; i++)
{
if (head->childArray[i])
{
walkTrie(head->childArray[i]);
}
}
return 0;
}
/*destroy*/
int destroy(struct trieNode* head)
{
int i = 0;
for (i = 0; i < 26; i++)
{
if (head->childArray[i])
{
destroy(head->childArray[i]);
}
}
free(head);
return 0;
}
int main()
{
char *testStr[] = {"ba", "bad", "da", "dad", "cc"};
int i = 0;
for (i = 0; i < sizeof(testStr)/sizeof(testStr[0]); i++)
{
insert(&head, testStr[i]);
}
//walkTrie(&head);
searchPrefix(&head, "ba");
/*free mem*/
for (i = 0; i < sizeof(testStr)/sizeof(testStr[0]); i++)
{
if (head.childArray[i])
{
destroy(head.childArray[i]);
}
}
}
路由查找(fib_trie)
数据结构
/*trie树root根*/
struct trie {
struct rt_trie_node __rcu *trie;
#ifdef CONFIG_IP_FIB_TRIE_STATS
struct trie_use_stats stats;
#endif
};
/*基础节点*/
struct rt_trie_node {
unsigned long parent;
t_key key;
};
/*节点*/
struct tnode {
unsigned long parent;
t_key key; /*网段*/
unsigned char pos; /*pos起点*/
unsigned char bits; /*位域*/
unsigned int full_children; /*槽位是否满*/
unsigned int empty_children;
union {
struct rcu_head rcu;
struct tnode *tnode_free;
};
struct rt_trie_node __rcu *child[0];
};
/*parent: 父指针4字节对齐,bit:0表示是否是叶子节点*/
#define IS_TNODE(n) (!(n->parent & T_LEAF))
#define IS_LEAF(n) (n->parent & T_LEAF)
[外链图片转存失败,源站可能有防盗链机制,建议将图片保存下来直接上传(img-aDlEiw4R-1627787560907)(E:\csdn_demo\算法\前缀树\fib_trie.png)]
查找路由最长前缀匹配,优先从较长的前缀开始匹配,所以路由trie树的pos值是从32开始往下将的。每一层是bits位,槽位数对应2的bits方个。
在没有匹配上前缀时会回溯,找到默认路由。
路由条目匹配的代码:fib_table_lookup
int fib_table_lookup(struct fib_table *tb, const struct flowi4 *flp,
struct fib_result *res, int fib_flags)
{
struct trie *t = (struct trie *) tb->tb_data;
int ret;
struct rt_trie_node *n;
struct tnode *pn;
unsigned int pos, bits;
t_key key = ntohl(flp->daddr);
unsigned int chopped_off;
t_key cindex = 0;
/*最长前缀匹配*/
unsigned int current_prefix_length = KEYLENGTH;
struct tnode *cn;
t_key pref_mismatch;
rcu_read_lock();
n = rcu_dereference(t->trie);
if (!n)
goto failed;
#ifdef CONFIG_IP_FIB_TRIE_STATS
t->stats.gets++;
#endif
/* Just a leaf? */
/*parent是指针,4字节对齐,最低一bit表示是否是leaf*/
if (IS_LEAF(n)) {
ret = check_leaf(tb, t, (struct leaf *)n, key, flp, res, fib_flags);
goto found;
}
pn = (struct tnode *) n;
chopped_off = 0;
while (pn) {
pos = pn->pos;
bits = pn->bits;
if (!chopped_off)
cindex = tkey_extract_bits(mask_pfx(key, current_prefix_length),
pos, bits);
n = tnode_get_child_rcu(pn, cindex);
if (n == NULL) {
#ifdef CONFIG_IP_FIB_TRIE_STATS
t->stats.null_node_hit++;
#endif
goto backtrace;
}
if (IS_LEAF(n)) {
ret = check_leaf(tb, t, (struct leaf *)n, key, flp, res, fib_flags);
if (ret > 0)
goto backtrace;
goto found;
}
cn = (struct tnode *)n;
/*
* It's a tnode, and we can do some extra checks here if we
* like, to avoid descending into a dead-end branch.
* This tnode is in the parent's child array at index
* key[p_pos..p_pos+p_bits] but potentially with some bits
* chopped off, so in reality the index may be just a
* subprefix, padded with zero at the end.
* We can also take a look at any skipped bits in this
* tnode - everything up to p_pos is supposed to be ok,
* and the non-chopped bits of the index (se previous
* paragraph) are also guaranteed ok, but the rest is
* considered unknown.
*
* The skipped bits are key[pos+bits..cn->pos].
*/
/* If current_prefix_length < pos+bits, we are already doing
* actual prefix matching, which means everything from
* pos+(bits-chopped_off) onward must be zero along some
* branch of this subtree - otherwise there is *no* valid
* prefix present. Here we can only check the skipped
* bits. Remember, since we have already indexed into the
* parent's child array, we know that the bits we chopped of
* *are* zero.
*/
/* NOTA BENE: Checking only skipped bits
for the new node here */
/*没有匹配上前缀,回溯找一条普适路由, 所以一开始匹配从长到段匹配,pos值从大大小排列*/
if (current_prefix_length < pos+bits) {
if (tkey_extract_bits(cn->key, current_prefix_length,
cn->pos - current_prefix_length)
|| !(cn->child[0]))
goto backtrace;
}
/*
* If chopped_off=0, the index is fully validated and we
* only need to look at the skipped bits for this, the new,
* tnode. What we actually want to do is to find out if
* these skipped bits match our key perfectly, or if we will
* have to count on finding a matching prefix further down,
* because if we do, we would like to have some way of
* verifying the existence of such a prefix at this point.
*/
/* The only thing we can do at this point is to verify that
* any such matching prefix can indeed be a prefix to our
* key, and if the bits in the node we are inspecting that
* do not match our key are not ZERO, this cannot be true.
* Thus, find out where there is a mismatch (before cn->pos)
* and verify that all the mismatching bits are zero in the
* new tnode's key.
*/
/*
* Note: We aren't very concerned about the piece of
* the key that precede pn->pos+pn->bits, since these
* have already been checked. The bits after cn->pos
* aren't checked since these are by definition
* "unknown" at this point. Thus, what we want to see
* is if we are about to enter the "prefix matching"
* state, and in that case verify that the skipped
* bits that will prevail throughout this subtree are
* zero, as they have to be if we are to find a
* matching prefix.
*/
pref_mismatch = mask_pfx(cn->key ^ key, cn->pos);
/*
* In short: If skipped bits in this node do not match
* the search key, enter the "prefix matching"
* state.directly.
*/
if (pref_mismatch) {
/* fls(x) = __fls(x) + 1 */
int mp = KEYLENGTH - __fls(pref_mismatch) - 1;
if (tkey_extract_bits(cn->key, mp, cn->pos - mp) != 0)
goto backtrace;
if (current_prefix_length >= cn->pos)
current_prefix_length = mp;
}
pn = (struct tnode *)n; /* Descend */
chopped_off = 0;
continue;
backtrace:
chopped_off++;
/* As zero don't change the child key (cindex) */
while ((chopped_off <= pn->bits)
&& !(cindex & (1<<(chopped_off-1))))
chopped_off++;
/* Decrease current_... with bits chopped off */
if (current_prefix_length > pn->pos + pn->bits - chopped_off)
current_prefix_length = pn->pos + pn->bits
- chopped_off;
/*
* Either we do the actual chop off according or if we have
* chopped off all bits in this tnode walk up to our parent.
*/
if (chopped_off <= pn->bits) {
cindex &= ~(1 << (chopped_off-1));
} else {
struct tnode *parent = node_parent_rcu((struct rt_trie_node *) pn);
if (!parent)
goto failed;
/* Get Child's index */
/*计算孩子的index值,位域pos: pos+bits*/
cindex = tkey_extract_bits(pn->key, parent->pos, parent->bits);
pn = parent;
chopped_off = 0;
#ifdef CONFIG_IP_FIB_TRIE_STATS
t->stats.backtrack++;
#endif
goto backtrace;
}
}
failed:
ret = 1;
found:
rcu_read_unlock();
return ret;c
}