字典树(Trie),也叫单词查找树或键树,是一种树形结构,属于哈希树的变种。它是一种针对字符串进行操作的数据结构,典型应用是用于统计和排序大量的字符串,如在搜索引擎系统中用于文本词频统计。其主要思想是利用字符串的公共前缀来节约存储空间。字典树在插入和查询字符串的操作上具有较高的效率。
/*-------------字典树查询单词---------------*/
#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define ALPHABET_SIZE 26
/* 定义节点 */
typedef struct TrieNode
{
struct TrieNode *children[ALPHABET_SIZE];
char character;
bool isEndOfWord;
} TrieNode;
/* 创建新的节点 */
TrieNode *createTrieNode()
{
TrieNode *node;
node = malloc(sizeof(TrieNode));
node->isEndOfWord = false;
int i = 0;
while (i < ALPHABET_SIZE)
{
node->children[i] = NULL;
i++;
}
return node;
}
/* 插入新词到字典树中 */
void insert(TrieNode *root, char *word)
{
if ((strlen(word) - 1) != 0)
{
char character = *word;
if (root->children[character - 97] == NULL)
{
TrieNode *node = NULL;
node = createTrieNode();
node->character = character;
root->children[character - 97] = node;
}
word++;
insert(root->children[character - 97], word);
}
else
{
root->isEndOfWord = true;
}
return;
}
/* 从字典树中搜索单词 */
TrieNode *search(TrieNode *root, char *word)
{
TrieNode *temp;
while (*word != '\0')
{
char character = *word;
if (root->children[character - 97] != NULL)
{
temp = root->children[character - 97];
word++;
root = temp;
}
else
{
printf("No possible words!!\n");
return NULL;
}
}
return root;
}
/* 打印单词 */
void printArray(char chars[], int len)
{
int i;
for (i = 0; i < len; i++)
{
printf("%c", chars[i]);
}
printf("\n");
}
/* 返回所有的相关单词 */
void printPathsRecur(TrieNode *node, char prefix[], int filledLen)
{
if (node == NULL)
return;
prefix[filledLen] = node->character;
filledLen++;
if (node->isEndOfWord)
{
printArray(prefix, filledLen);
}
int i;
for (i = 0; i < ALPHABET_SIZE; i++)
{
printPathsRecur(node->children[i], prefix, filledLen);
}
}
/* 遍历字典树查询到符合条件的单词 */
void traverse(char prefix[], TrieNode *root)
{
TrieNode *temp = NULL;
temp = search(root, prefix);
int j = 0;
while (prefix[j] != '\0')
{
j++;
}
printPathsRecur(temp, prefix, j - 1);
}
#define NUMBER_OF_WORDS (354935)
#define INPUT_WORD_SIZE (100)
/* 用户输入要查询的词 */
char *receiveInput(char *s)
{
scanf("%99s", s);
return s;
}
int main()
{
/* 读取字典文件 */
int word_count = 0;
char *words[NUMBER_OF_WORDS];
FILE *fp = fopen("dictionary.txt", "r");
if (fp == 0)
{
fprintf(stderr, "Error while opening dictionary file");
exit(1);
}
words[word_count] = malloc(INPUT_WORD_SIZE);
while (fgets(words[word_count], INPUT_WORD_SIZE, fp))
{
word_count++;
words[word_count] = malloc(INPUT_WORD_SIZE);
}
/* 将单词放入字典树 */
TrieNode *root = NULL;
root = createTrieNode();
int i;
for (i = 0; i < NUMBER_OF_WORDS; i++)
{
insert(root, words[i]);
}
while (1)
{
printf("Enter keyword: ");
char str[100];
receiveInput(str);
printf("\n==========================================================\n");
printf("\n********************* Possible Words ********************\n");
/* 从字典树中找到单词 */
traverse(str, root);
printf("\n==========================================================\n");
}
}
查询结果示例:
Enter keyword: cc
==========================================================
********************* Possible Words ********************
cc
ccesser
cchaddoorck
ccid
ccitt
cckw
ccm
ccw
ccws
==========================================================
Enter keyword: