前缀树Trie

最新推荐文章于 2024-11-14 21:19:37 发布

海棠花不香

最新推荐文章于 2024-11-14 21:19:37 发布

阅读量91

点赞数

分类专栏： c++ 文章标签： c++

本文链接：https://blog.csdn.net/qunsorber/article/details/120766632

版权

c++ 专栏收录该内容

15 篇文章 0 订阅

订阅专栏

Trie树，即字典树，是一种树形结构，是一种哈希树的变种。主要用来统计和排序大量的字符串，所以经常被搜索引擎系统用于文本词频统计。它的优点是：最大限度地减少无谓的字符串比较。
Trie的核心思想是空间换时间。利用字符串的公共前缀来降低查询时间的开销以达到提高效率的目的。
这里提供前缀树的封装接口：增加，删除，查询有无指定字符串及模糊查询。

#include <iostream>
#include <cstring>
#include <string>
#include <vector>

using namespace std;

const int SIZE  = 26;

struct TrieNode {
    bool isEnd = false;
    int count = 0;
    string word = "";
    struct TrieNode* children[SIZE];
};

class Trie {
public:
    Trie();
    ~Trie();
    
    void destroyNode(TrieNode *& node);// only pass TrieNode* cannot make node = nullptr

    void addWord(const std::string& word);

    void removeWord(const std::string& word);

    bool hasWord(const std::string& word) const;

    void getWords(TrieNode *node, vector<std::string> &vecStr) const;

    std::vector<std::string> emumerateWords(const std::string& prefix) const;

private:

    TrieNode *root = nullptr;

    TrieNode *createTrieNode();
};

Trie::Trie(){
    root = new TrieNode();
}

Trie::~Trie(){
    destroyNode(root);
}

void Trie::destroyNode(TrieNode *& node){
    if(node == nullptr)
        return;
    for(int i = 0;i < SIZE; i++){
        destroyNode(node->children[i]);
    }
    delete node;
    node = nullptr;// if use TrieNode* this code cannot work well
}

TrieNode* Trie::createTrieNode(){
    TrieNode *pNode = new TrieNode();
    for(int i=0; i < SIZE; ++i)
        pNode->children[i] = nullptr;
    return pNode;
}

void Trie::addWord(const std::string& word){
    TrieNode *node = root;
    int index = 0;
    for(auto ch : word){
        index = ch - 'a'; 
        if(node->children[index] == nullptr){
            node->children[index] = createTrieNode();
        }
        node = node->children[index];
        node->count ++;
    }
    node->isEnd = true;
    node->word = word;
}

void Trie::removeWord(const std::string& word){
    if(!hasWord(word)){
        std::cout << word <<" not in data set, please check" << std::endl;
        return;
    }
    TrieNode *node = root;
    int index = 0;
    for(auto ch : word){
        index = ch - 'a';
        node = node->children[index];
        if(--node->count == 0){
            destroyNode(node);
            return;
        }
    }
    node->isEnd = false;
}

bool Trie::hasWord(const std::string& word) const{
    TrieNode *node = root;
    int index = 0;
    for(auto ch : word){
        index = ch - 'a'; 
        if(node->children[index] != nullptr)
            node = node->children[index];
        else
            return false;
    }
    return node->isEnd; 
}

void Trie::getWords(TrieNode* node, vector<string> &vecStr) const{
   if(node == nullptr) 
       return ;
   if(node->isEnd)
       vecStr.push_back(node->word);
   for(int i = 0; i < SIZE; i++){
       getWords(node->children[i],vecStr);
   }
}

std::vector<std::string> Trie::emumerateWords(const std::string& prefix) const{
    TrieNode *node = root;
    std::vector<std::string> res;
    int index = 0;
    for(auto ch : prefix){
        index = ch - 'a'; 
        if(node->children[index] != nullptr)
            node = node->children[index];
        else
            return res;
    }
    getWords(node,res);
    return res;
}