前缀树是一种用于快速检索的多叉树结构,利用字符串的公共前缀来降低查询时间,核心思想是空间换时间,经常被搜索引擎用于文本词频统计。
优点:最大限度地减少无谓的字符串比较,查询效率高;
缺点:内存消耗较大;
特性:
(1)不同字符串的相同前缀只保存一份;
(2)结点不存放数据,数据存储在树的边上,结点存放字符经过的次数和结束的次数;
例如:给出一组单词,adv, air, ait, cat
前缀树的类定义
我们需要储存每个结点的pass和end值,再准备一个长度为26的指针数组来储存字符信息。至于字符信息怎么表示,我们需要储存的任意字符-'a’就能映射到数组中的对应位置上,例如我们要储存a,那么a-a=0,所以我们就专门在0位置储存字符a,依次类推。。。
直接上代码,头文件"TrieTree.h"
#pragma once
#include <string>
#include <stack>
using namespace std;
struct _Node{
int pass;
int end;
_Node* next[26];
_Node()
{
pass = 0;
end = 0;
for (int i = 0; i < 26; i++)
{
next[i] = NULL;
}
}
};
class TrieTree
{
public:
~TrieTree();
TrieTree();
//插入字符串
void add(string str);
//查询包含该前缀的字符串数量
int preNum(string str);
//查询字符串次数
int searchNum(string str);
//删除字符串
void erase(string str);
private:
_Node* root;
};
cpp文件"TrieTree.cpp"
#include "stdafx.h"
#include "TrieTree.h"
TrieTree::TrieTree()
{
root = new _Node();
}
TrieTree::~TrieTree()
{
}
void TrieTree::add(string str)
{
if (str.empty())
return;
root->pass++;
int path = 0;
_Node* cur = root;
for (int i = 0; i < str.length(); i++)
{
path = str[i] - 'a';
if (!cur->next[path])
cur->next[path] = new _Node();
cur = cur->next[path];
cur->pass++;
}
cur->end++;
}
int TrieTree::preNum(string str)
{
if (str.empty())
return -1;
int path = 0;
_Node* cur = root;
for (int i = 0; i < str.length(); i++)
{
path = str[i] - 'a';
if (cur->next[path] == NULL)
return 0;
cur = cur->next[path];
}
return cur->pass;
}
int TrieTree::searchNum(string str)
{
if (str.empty())
return -1;
int path = 0;
_Node* cur = root;
for (int i = 0; i < str.length(); i++)
{
path = str[i] - 'a';
if (cur->next[path] == NULL)
return 0;
cur = cur->next[path];
}
return cur->end;
}
void TrieTree::erase(string str)
{
if (str.length() || searchNum(str) == 0)
return;
int path = 0;
_Node* cur = root;
cur->pass--;
for (int i = 0; i < str.length(); i++)
{
path = str[i] - 'a';
if (--cur->next[path]->pass == 0)
{
_Node* eNode = cur->next[path];
stack<_Node*> est;
est.push(eNode);
while (!est.empty())
{
_Node* now = est.top();
est.pop();
for (int i = 0; i < 26; i++)
{
if (now->next[i] != NULL)
{
est.push(now->next[i]);
}
}
delete now;
}
}
cur = cur->next[path];
}
cur->end--;
}