单词查找树简介
代码实现
单词查找树简介
Trie树,是一种树形结构,是一种哈希树的变种。典型应用是用于统计,排序和保存大量的字符串(但不仅限于字符串),所以经常被搜索引擎系统用于文本词频统计。它的优点是:利用字符串的公共前缀来减少查询时间,最大限度地减少无谓的字符串比较,查询效率比哈希树高。
单词查找树的模拟过程可以看链接:Prefix tree
对于下图的Trie树, 总共有4个单词,abc, ad, efa, ef
代码实现
节点:
#define CHARLENGTH 256
typedef struct TrieNode {
struct TrieNode *next[CHARLENGTH];
int isEndOfWord;
int count; //子节点的数量
char value;
} TrieNode, *TrieNodePtr, *TrieSTPtr;
插入一个节点:
void insert(TrieSTPtr root, char *key) {
int i = 0;
TrieSTPtr tmp = root;
while (*(key + i) != '\0') {
putchar(*(key+i));
if (tmp->next[*(key + i)] == NULL) {
TrieNodePtr t = createTrieNode(*(key + i));
tmp->next[*(key + i)] = t;
tmp->count++;
}
tmp = tmp->next[*(key + i)];
i++;
}
tmp->isEndOfWord = TRUE;
}
搜索一个单词:
int searchTrie(TrieSTPtr root, char *str)
{
if (root == NULL)
return 0;
TrieSTPtr tmp = root;
int i = 0;
while (str[i] != NULL){
if (tmp->next[str[i]] != NULL){
tmp = tmp->next[str[i]];
}
else
return FALSE;
i++;
}
if (tmp->isEndOfWord == TRUE) {
return TRUE;
}
else {
return FALSE;
}
}
删除节点后返回路径上最近的一个键:
删除节点,总共会有四种情况:
1. 字符串超出范围
2. 字符串刚好结束,节点没有子节点
3. 字符串已经结束,节点存在字节点(count != 0)
(1) 节点是单词的结尾(node->isEndOfWord = TRUE)
(2) 节点不是单词的结尾(node->isEndOfWord = FALSE)
TrieSTPtr deleteKey(TrieSTPtr root, char *key, int d) {
if (root == NULL)//case 1: 超出长度
return NULL;
if (d != strlen(key)) {
char c = *(key + d);
TrieSTPtr tmp = root->next[c];
tmp = deleteKey(root->next[c], key, d + 1);
if (tmp == NULL) { //节点被删
if (root->count != 0)
root->count--;
if (root->isEndOfWord == TRUE) { //case 3 其它字符串的结尾
return root;
}
else if(root->count == 0 && root->isEndOfWord == FALSE){ //case 3 其它字符串的结尾
free(root);
root = NULL;
return root;
}
}
else { //节点未被删
return tmp;
}
}
else {//字符串结尾
if (root->count == 0) { //case 2:无子节点
free(root); //删除节点
root = NULL;
}
else { //case 3
root->isEndOfWord = FALSE;
}
return root;
}
}
完整代码:
#include
#include
#include
#include
#include
#define LENGTH 4
#define WORDLENGTH 3
#define CHARLENGTH 256
#define TRUE 1
#define FALSE 0
typedef struct TrieNode {
struct TrieNode *next[CHARLENGTH];
int isEndOfWord;
int count; //next的数量
char value;
} TrieNode, *TrieNodePtr, *TrieSTPtr;
TrieNodePtr createTrieNode(char key) {
TrieNodePtr t = (TrieNodePtr)malloc(sizeof(TrieNode));
memset(t, 0, sizeof(TrieNode));
//t->isEndOfWord = FALSE;
//t->count=0;
t->value = key;
return t;
}
TrieSTPtr createTrie() {
TrieSTPtr t = (TrieSTPtr)malloc(sizeof(TrieNode));
memset(t, 0, sizeof(TrieNode));
return t;
}
void insert(TrieSTPtr root, char *key) {
int i = 0;
TrieSTPtr tmp = root;
while (*(key + i) != '\0') {
putchar(*(key+i));
if (tmp->next[*(key + i)] == NULL) {
TrieNodePtr t = createTrieNode(*(key + i));
tmp->next[*(key + i)] = t;
tmp->count++;
}
tmp = tmp->next[*(key + i)];
i++;
}
tmp->isEndOfWord = TRUE;
}
void deleteTrie(TrieSTPtr t) {
for (int i = 0; i < CHARLENGTH; i++) {
if (t->next[i] != NULL) {
deleteTrie(t->next[i]);
free(t->next[i]);
t->next[i] = NULL;
}
}
}
int searchTrie(TrieSTPtr root, char *str)
{
if (root == NULL)
return 0;
TrieSTPtr tmp = root;
int i = 0;
while (str[i] != NULL){
if (tmp->next[str[i]] != NULL){
tmp = tmp->next[str[i]];
}
else
return FALSE;
i++;
}
if (tmp->isEndOfWord == TRUE) {
return TRUE;
}
else {
return FALSE;
}
}
TrieSTPtr deleteKey(TrieSTPtr root, char *key, int d) {
if (root == NULL)//超出长度
return NULL;
if (d != strlen(key)) {
char c = *(key + d);
TrieSTPtr tmp = root->next[c];
tmp = deleteKey(root->next[c], key, d + 1);
if (tmp == NULL) { //节点被删
if (root->count != 0)
root->count--;
if (root->isEndOfWord == TRUE) { //其它字符串的结尾
return root;
}
else if(root->count == 0 && root->isEndOfWord == FALSE){ //其它字符串的结尾
free(root);
root = NULL;
return root;
}
}
else { //节点未被删
return tmp;
}
}
else {//字符串结尾
if (root->count == 0) { //无子节点
free(root); //删除节点
root = NULL;
}
else {
root->isEndOfWord = FALSE;
}
return root;
}
}
void main() {
char *name[LENGTH] = {
"abc",
"def",
"ape",
"ap",
};
TrieSTPtr root = createTrie();
for (int i = 0; i < LENGTH; i++) {
insert(root, name[i]);
}
deleteKey(root, "ab",0);
printf("%d\n", searchTrie(root, "ape"));
deleteTrie(root);
}