C++ trie实现拼写检查

trie:用键值的一部分来确定查找路径的树称为trie


节点是一个对象,包含以下成员:一个叶/非叶节点标志,一个单词结束标志,一个指向字符串的指针,以及一个指向指针数组的指针,该指针指向同样类型的结构

class TrieNonLeafNode
{
public:
    TrieNonLeafNode(){ }
    TrieNonLeafNode(char);
    //~TrieNonLeafNode();
private:
    bool leaf, endOfWord;   //叶、非叶节点标志,单词结束标志
    char *letters;          //指向字符串的指针
    TrieNonLeafNode **ptrs; //指向指针数组的指针
    friend class Trie;
};

通过读取字典中的单词,来确定输入的单词是不是错误(不区分大小写),测试如下

/*****************dictionary*******************/
a
ara
are
area
eerie
Eire
era
ere
Erie
ire
IPA
pear
peer
per
pier
rear
rep

完整代码如下:

/*****************trie.h*******************/
#pragma once
class TrieNonLeafNode
{
public:
    TrieNonLeafNode(){ }
    TrieNonLeafNode(char);
    //~TrieNonLeafNode();
private:
    bool leaf, endOfWord;   //叶、非叶节点标志,单词结束标志
    char *letters;          //指向字符串的指针
    TrieNonLeafNode **ptrs; //指向指针数组的指针
    friend class Trie;
};

class TrieLeafNode
{
public:
    TrieLeafNode(){ }
    TrieLeafNode(char *);
    //~TrieLeafNode();
private:
    bool leaf;
    char *word;
    friend class Trie;
};

class Trie
{
public:
    Trie():notFound(-1){ }
    Trie(char *);
    void printTrie(){
        *prefix = '\0';
        printTrie(0, root, prefix);
    }
    void insert(char *);
    bool wordFound(char *);
private:
    TrieNonLeafNode *root;
    const int notFound;
    char prefix[80];
    int position(TrieNonLeafNode *, char);
    void addCell(char, TrieNonLeafNode *, int);
    void createLeaf(char, char *, TrieNonLeafNode *);
    void printTrie(int, TrieNonLeafNode *, char *);
};
/*****************trie.cpp*******************/
#include <iostream>
#include <cstring>
#include <cstdlib>
#include "trie.h"
using namespace std;

TrieLeafNode::TrieLeafNode(char *suffix)    //创建叶节点
{
    leaf = true;
    word = new char[strlen(suffix)+1];
    if(word == 0){
        cerr << "out of memory2\n";
        exit(-1);
    }
    strcpy_s(word, strlen(suffix) + 1, suffix);
}

TrieNonLeafNode::TrieNonLeafNode(char ch)   //创建非页节点
{
    ptrs = new TrieNonLeafNode *;
    letters = new char[2];
    if(ptrs == 0 || letters == 0){
        cerr << "out of memory3\n";
        exit(1);
    }
    leaf = false;
    endOfWord = false;
    *ptrs = 0;
    *letters = ch;
    *(letters + 1) = '\0';
}

Trie::Trie(char *word):notFound(-1)
{
    root = new TrieNonLeafNode(*word);
    createLeaf(*word, word+1, root);
}

void Trie::printTrie(int depth, TrieNonLeafNode *p, char *prefix)
{
    int i;
    if(p->leaf){    //叶节点
        TrieLeafNode *lf = (TrieLeafNode *)p;
        for(int i = 1; i <= depth; i++)
            cout << " ";
        cout << " >>" << prefix << "|" << lf->word << endl;
    }
    else{   //非叶节点
        for(i = strlen(p->letters)-1; i >= 0; i--){
            if(p->ptrs[i]!= '\0'){
                prefix[depth] = p->letters[i];
                prefix[depth+1] = '\0';
                printTrie(depth + 1, p->ptrs[i], prefix);
            }
        }
        if(p->endOfWord){
            prefix[depth] = '\0';
            for(i = 1; i <= depth; i++)
                cout << " ";
            cout << ">>>" << prefix << endl;
        }
    }
}

int Trie::position(TrieNonLeafNode *p, char ch) //寻找字符在指针数组中的位置
{
    int i;
    for(i = 0; i < strlen(p->letters) && p->letters[i] != ch; i++);
    if(i < strlen(p->letters))
        return i;
    else
        return notFound;
}

bool Trie::wordFound(char *word)
{
    TrieNonLeafNode *p = root;
    TrieLeafNode *lf;
    int pos;
    while(true){
        if(p->leaf){
            lf = (TrieLeafNode *)p;
            if(strcmp(word, lf->word) == 0)
                return true;
            else
                return false;
        }
        else if(*word == '\0'){
            if(p->endOfWord)
                return true;
            else
                return false;
        }
        else if((pos = position(p, *word)) != notFound && p->ptrs[pos] != 0){
            p = p->ptrs[pos];
            word++;
        }
        else
            return false;
    }
}

void Trie::addCell(char ch, TrieNonLeafNode *p, int stop)   //申请非叶节点空间
{
    int i, len = strlen(p->letters);
    char *s = p->letters;
    TrieNonLeafNode **tmp = p->ptrs;
    p->letters = new char[len+2];
    p->ptrs = new TrieNonLeafNode*[len+1];
    if(p->letters == 0 || p->ptrs == 0){
        cerr << "out of memory1\n";
        exit(1);
    }
    for(i = 0; i < len + 1; i++)
        p->ptrs[i] = 0;
    if(stop < len){
        for(i = len; i >= stop + 1; i--){
            p->ptrs[i] = tmp[i-1];
            p->letters[i] = s[i-1];
        }
    }
    p->letters[stop] = ch;
    for(i = stop - 1; i >= 0; i--){
        p->ptrs[i] = tmp[i];
        p->letters[i] = s[i];
    }
    p->letters[len+1] = '\0';
    delete []s;
}

void Trie::createLeaf(char ch, char *suffix, TrieNonLeafNode *p)    //创建叶节点
{
    int pos = position(p, ch);
    if(pos == notFound){
        for(pos = 0; pos < strlen(p->letters) && p->letters[pos] < ch; pos++);
        addCell(ch, p, pos);
    }
    p->ptrs[pos] = (TrieNonLeafNode *) new TrieLeafNode(suffix);
}

void Trie::insert(char *word)
{
    TrieNonLeafNode *p = root;
    TrieLeafNode *lf;
    int offset, pos;
    char *hold = word;
    while(true){
        if(*word == '\0'){
            if(p->endOfWord)    
                cout << "Duplicate entry1 " << hold << endl;
            else
                p->endOfWord = true;
            return ;
        }
        pos = position(p, *word);
        if(pos == notFound){
            createLeaf(*word, word+1, p);
            return ;
        }
        else if(pos != notFound && p->ptrs[pos]->leaf){
            lf = (TrieLeafNode *)p->ptrs[pos];
            if(strcmp(lf->word, word + 1) == 0){
                cout << "Duplicate entry2 " << hold << endl;
                return ;
            }
            offset = 0;
            do{
                pos = position(p, word[offset]);
                if(strlen(word) == offset + 1){
                    p->ptrs[pos] = new TrieNonLeafNode(word[offset]);
                    p->ptrs[pos]->endOfWord = true;
                    createLeaf(lf->word[offset], lf->word + offset + 1, p->ptrs[pos]);
                    return ;
                }
                else if(strlen(lf->word) == offset){
                    p->ptrs[pos] = new TrieNonLeafNode(word[offset+1]);
                    p->ptrs[pos]->endOfWord = true;
                    createLeaf(word[offset+1], word + offset + 2, p->ptrs[pos]);
                    return ;
                }
                p->ptrs[pos] = new TrieNonLeafNode(word[offset+1]);
                p = p->ptrs[pos];
                offset++;
            }while(word[offset] == lf->word[offset-1]);
            offset--;
            char *s = "";
            if(strlen(word) > offset + 2)
                s = word + offset + 2;
            createLeaf(word[offset+1], s, p);
            if(strlen(lf->word) > offset + 1)
                s = lf->word + offset + 1;
            else
                s = "";
            createLeaf(lf->word[offset], s, p);
            delete [] lf->word;
            delete lf;
            return ;
        }
        else{
            p = p->ptrs[pos];
            word++;
        }
    }
}



/*****************spellcheck.cpp********************/
#include <iostream>
#include <fstream>
#include <cstdlib>
#include <cstring>
#include <cctype>
#include "trie.h"
using namespace std;

char *Strupr(char *s)
{
    char *ss;
    for(ss = s; *s = toupper(*s); s++); 
    //for循环判断条件为空就退出循环,而toupper()函数对于非字母输入则返回原值;
    //所以在遍历完s后读入NULL值的toupper()函数也会返回NULL,导致退出循环
    return ss;
}

int main(int argc, char *argv)
{
    char fileName[25], s[80], ch;
    int i, lineNum = 1;
    ifstream dictionary("dictionary");
    if(dictionary.fail()){
        cerr << "cannot open 'dictionary'\n";
        exit(-1);
    }
    dictionary >> s;
    Trie trie(Strupr(s));
    while(dictionary >> s)
        trie.insert(Strupr(s));
    trie.printTrie();
    if(argc != 2){
        cout << "Enter a file name: ";
        cin >> fileName;
    }
    else
        strcpy(fileName, &argv[1]);
    ifstream textFile(fileName);
    if(textFile.fail()){
        cout << "cannot open " << fileName << endl;
        exit(-1);
    }
    cout << "misspelled words:\n";
    textFile.get(ch);
    while(!textFile.eof()){
        while(true){
            if(!textFile.eof() && !isalpha(ch)){
                if(ch == '\n')
                    lineNum++;
                textFile.get(ch);
            }
            else break;
        }
        if(textFile.eof())
            break;
        for(i = 0; !textFile.eof() && isalpha(ch); i++){
            s[i] = toupper(ch);
            textFile.get(ch);
        }
        s[i] = '\0';
        if(!trie.wordFound(s))
            cout << s << " on line " << lineNum << endl;
    }
    dictionary.close();
    textFile.close();

    return 0;
}


  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
以下是一个简单的 C 语言字典 Trie 树的代码实现: ```c #include <stdio.h> #include <stdlib.h> #include <string.h> #define MAX_WORD_LEN 100 typedef struct TrieNode { char value; struct TrieNode *children[26]; int is_end; } TrieNode; TrieNode *createNode(char value) { TrieNode *node = (TrieNode *) malloc(sizeof(TrieNode)); node->value = value; for (int i = 0; i < 26; i++) { node->children[i] = NULL; } node->is_end = 0; return node; } void insert(TrieNode *root, char *word) { TrieNode *curr = root; int len = strlen(word); for (int i = 0; i < len; i++) { int index = word[i] - 'a'; if (curr->children[index] == NULL) { curr->children[index] = createNode(word[i]); } curr = curr->children[index]; } curr->is_end = 1; } int search(TrieNode *root, char *word) { TrieNode *curr = root; int len = strlen(word); for (int i = 0; i < len; i++) { int index = word[i] - 'a'; if (curr->children[index] == NULL) { return 0; } curr = curr->children[index]; } return curr->is_end; } int main() { TrieNode *root = createNode('\0'); char word[MAX_WORD_LEN]; int choice = 0; do { printf("1. Insert Word\n"); printf("2. Search Word\n"); printf("3. Exit\n"); printf("Enter Choice: "); scanf("%d", &choice); switch (choice) { case 1: printf("Enter Word to Insert: "); scanf("%s", word); insert(root, word); break; case 2: printf("Enter Word to Search: "); scanf("%s", word); if (search(root, word)) { printf("%s is present in the dictionary.\n", word); } else { printf("%s is not present in the dictionary.\n", word); } break; case 3: printf("Exiting...\n"); break; default: printf("Invalid Choice!\n"); break; } } while (choice != 3); return 0; } ``` 该实现中使用了一个 TrieNode 结构体来表示 Trie 树中的每个节点,其中包含了节点的值,子节点指针数组和一个标志位,用于指示该节点是否为单词的结尾。 在插入单词时,从根节点开始遍历 Trie 树,如果当前节点的相应子节点为空,则新建一个节点并将其作为当前节点的相应子节点。最后将单词的结尾节点的标志位设置为 1。 在查找单词时,同样从根节点开始遍历 Trie 树,如果当前节点的相应子节点为空,则说明该单词不存在于 Trie 树中。如果单词的最后一个字符所在的节点的标志位为 1,则说明该单词存在于 Trie 树中。 该实现中还包含了一个简单的命令行界面,用于接收用户的输入并执行相应的操作。

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值