关于Trie树的理论的东西,各位大神都已经说得很多了。参见这里是Trie树的简单介绍或者这里是它和其他一些树结构的使用场景。
Trie树又称“字典树”或“前缀树”,常见于小规模地统计词频,典型的用空间换时间。下面是我的一种c++实现,记录一下。
trie.h头文件
#ifndef _TRIE_STRUCTURE_H
#define _TRIE_STRUCTURE_H
#include <string>
#include <cstdlib>
#include <ctype.h>
#include <fstream>
#include <iostream>
#include <sstream>
using namespace std;
#define MAX_BRANCH 26
typedef enum NODE_TYPE
{
ROOT_NODE = -1,
NORMAL_NODE = 0,
LEAF_NODE = 1,
}NODE_TYPE_e;
typedef struct TrieNode
{
int freq;//次数
int type;//标志 -1: 根结点; 0: 普通结点; 1: 叶子结点
TrieNode* next[MAX_BRANCH];
TrieNode(int n, int tp): freq(n), type(tp){
for(int i = 0; i < MAX_BRANCH; i++)
next[i] = NULL;
}
}TrieNode, *pTrieNode;
class DictTree
{
public:
DictTree();
DictTree(pTrieNode &rt);
~DictTree();
//将单词插入到树中
bool InsertWord(string &word);
//将单词从树中删除
bool DeleteWord(string &word);
//返回单词的频度,单词不存在返回0
int SearchWord(string &word, int isprefix=false);
//统计前缀为prefix的单词数量
int PrefixCount(string &prefix);
//将文档中的单词分割开,并逐个调用InsertWord方法插入到树中
void SplitWord(string &article);
private:
//根结点
pTrieNode root;
//销毁树
void DestroyTree(pTrieNode &tree);
//将string中的大写字母转换为小写
string StringToLower(string &word)
{
const char* p = word.c_str();
string neword;
for(unsigned int i = 0; i < word.length(); i++)
{
neword += tolower(p[i]);
}
return neword;
}
};
DictTree::DictTree()
{
root = new TrieNode(0, ROOT_NODE);
if(!root)
exit(0);
}
DictTree::DictTree(pTrieNode &rt): root(rt)
{
cout << "Copy Constructor DictTree()" << endl;
}
DictTree::~DictTree()
{
DestroyTree(root);
root = NULL;
}
bool DictTree::InsertWord(string &ex_word)
{
pTrieNode p = root;
string word = StringToLower(ex_word);
const char* letter = word.c_str();
if(!letter)
return false;
for(unsigned int i = 0; i < word.length(); i++)
{
if(!isalpha(letter[i]))
break;
if(p->next[letter[i]-'a'] == NULL)
{
p->next[letter[i]-'a'] = new TrieNode(1, NORMAL_NODE);
if(!p->next[letter[i]-'a'])
exit(0);
}
else
{
p->next[letter[i]-'a']->freq++;
}
p = p->next[letter[i]-'a'];
}
if(p != root)
p->type = LEAF_NODE;
return true;
}
bool DictTree::DeleteWord(string &ex_word)
{
pTrieNode p = root;
string word = StringToLower(ex_word);
const char* letter = word.c_str();
if(!letter)
return 0;
for(unsigned int i = 0; i < word.length(); i++)
{
if(!isalpha(letter[i]))
break;
if(p->next[letter[i]-'a'] == NULL)
{
break;
}
else
{
p = p->next[letter[i]-'a'];
p->freq--;
if(p->freq <= 0)
{
free(p);
}
}
}
if(p->type == LEAF_NODE)
return true;
return false;
}
int DictTree::SearchWord(string &ex_word, int isprefix)
{
pTrieNode p = root;
string word = StringToLower(ex_word);
const char* letter = word.c_str();
if(!letter)
return 0;
for(unsigned int i = 0; i < word.length(); i++)
{
if(!isalpha(letter[i]))
break;
if(p->next[letter[i]-'a'] == NULL)
return 0;
else
p = p->next[letter[i]-'a'];
}
if(isprefix)
return p->freq;
else if(LEAF_NODE == p->type)
return p->freq;
return 0;
}
int DictTree::PrefixCount(string &prefix)
{
return SearchWord(prefix, true);
}
void DictTree::SplitWord(string &article)
{
stringstream single_word;
ifstream essay;
essay.open(article.c_str());
single_word << essay.rdbuf();
essay.close();
string word;
while(single_word >> word)
{
InsertWord(word);
word.clear();
}
}
void DictTree::DestroyTree(pTrieNode &tree)
{
if(!tree)
return;
for(int i = 0; i < MAX_BRANCH; i++)
{
DestroyTree(tree->next[i]);
}
free(tree);
}
#endif//_TRIE_STRUCTURE_H
trieTest.cpp文件,写了一个小demo,没有经过充分测试。
#include "trie.h"
#define WORD_FILE "article.txt"
const int size = 6;
string chart[size] = {
"programming",
"you",
"potential",
"Google",
"major",
"professional",
};
int main(int argc, char const *argv[])
{
DictTree container;
string file = WORD_FILE;
container.SplitWord(file);
cout << "start Search: " << endl;
for(int i = 0; i < size; i++)
{
cout << chart[i] << " --> " << container.SearchWord(chart[i]) << endl;
}
container.DeleteWord(chart[1]);
cout << "after DeleteWord: " << endl;
for(int i = 0; i < size; i++)
{
cout << chart[i] << " --> " << container.SearchWord(chart[i]) << endl;
}
string prefix = "pro";
cout << "prefix of 'pro' --> " << container.PrefixCount(prefix) << endl;
return 0;
}
单词文件中的内存如下:
programming you programming potential Google goolge google major potential programming You Major professional
下面是打印: