C++ trie实现拼写检查

版权声明:本文为博主原创文章,未经博主允许不得转载。 https://blog.csdn.net/u012630961/article/details/78738491

trie:用键值的一部分来确定查找路径的树称为trie


节点是一个对象,包含以下成员:一个叶/非叶节点标志,一个单词结束标志,一个指向字符串的指针,以及一个指向指针数组的指针,该指针指向同样类型的结构

class TrieNonLeafNode
{
public:
    TrieNonLeafNode(){ }
    TrieNonLeafNode(char);
    //~TrieNonLeafNode();
private:
    bool leaf, endOfWord;   //叶、非叶节点标志,单词结束标志
    char *letters;          //指向字符串的指针
    TrieNonLeafNode **ptrs; //指向指针数组的指针
    friend class Trie;
};

通过读取字典中的单词,来确定输入的单词是不是错误(不区分大小写),测试如下

/*****************dictionary*******************/
a
ara
are
area
eerie
Eire
era
ere
Erie
ire
IPA
pear
peer
per
pier
rear
rep

完整代码如下:

/*****************trie.h*******************/
#pragma once
class TrieNonLeafNode
{
public:
    TrieNonLeafNode(){ }
    TrieNonLeafNode(char);
    //~TrieNonLeafNode();
private:
    bool leaf, endOfWord;   //叶、非叶节点标志,单词结束标志
    char *letters;          //指向字符串的指针
    TrieNonLeafNode **ptrs; //指向指针数组的指针
    friend class Trie;
};

class TrieLeafNode
{
public:
    TrieLeafNode(){ }
    TrieLeafNode(char *);
    //~TrieLeafNode();
private:
    bool leaf;
    char *word;
    friend class Trie;
};

class Trie
{
public:
    Trie():notFound(-1){ }
    Trie(char *);
    void printTrie(){
        *prefix = '\0';
        printTrie(0, root, prefix);
    }
    void insert(char *);
    bool wordFound(char *);
private:
    TrieNonLeafNode *root;
    const int notFound;
    char prefix[80];
    int position(TrieNonLeafNode *, char);
    void addCell(char, TrieNonLeafNode *, int);
    void createLeaf(char, char *, TrieNonLeafNode *);
    void printTrie(int, TrieNonLeafNode *, char *);
};
/*****************trie.cpp*******************/
#include <iostream>
#include <cstring>
#include <cstdlib>
#include "trie.h"
using namespace std;

TrieLeafNode::TrieLeafNode(char *suffix)    //创建叶节点
{
    leaf = true;
    word = new char[strlen(suffix)+1];
    if(word == 0){
        cerr << "out of memory2\n";
        exit(-1);
    }
    strcpy_s(word, strlen(suffix) + 1, suffix);
}

TrieNonLeafNode::TrieNonLeafNode(char ch)   //创建非页节点
{
    ptrs = new TrieNonLeafNode *;
    letters = new char[2];
    if(ptrs == 0 || letters == 0){
        cerr << "out of memory3\n";
        exit(1);
    }
    leaf = false;
    endOfWord = false;
    *ptrs = 0;
    *letters = ch;
    *(letters + 1) = '\0';
}

Trie::Trie(char *word):notFound(-1)
{
    root = new TrieNonLeafNode(*word);
    createLeaf(*word, word+1, root);
}

void Trie::printTrie(int depth, TrieNonLeafNode *p, char *prefix)
{
    int i;
    if(p->leaf){    //叶节点
        TrieLeafNode *lf = (TrieLeafNode *)p;
        for(int i = 1; i <= depth; i++)
            cout << " ";
        cout << " >>" << prefix << "|" << lf->word << endl;
    }
    else{   //非叶节点
        for(i = strlen(p->letters)-1; i >= 0; i--){
            if(p->ptrs[i]!= '\0'){
                prefix[depth] = p->letters[i];
                prefix[depth+1] = '\0';
                printTrie(depth + 1, p->ptrs[i], prefix);
            }
        }
        if(p->endOfWord){
            prefix[depth] = '\0';
            for(i = 1; i <= depth; i++)
                cout << " ";
            cout << ">>>" << prefix << endl;
        }
    }
}

int Trie::position(TrieNonLeafNode *p, char ch) //寻找字符在指针数组中的位置
{
    int i;
    for(i = 0; i < strlen(p->letters) && p->letters[i] != ch; i++);
    if(i < strlen(p->letters))
        return i;
    else
        return notFound;
}

bool Trie::wordFound(char *word)
{
    TrieNonLeafNode *p = root;
    TrieLeafNode *lf;
    int pos;
    while(true){
        if(p->leaf){
            lf = (TrieLeafNode *)p;
            if(strcmp(word, lf->word) == 0)
                return true;
            else
                return false;
        }
        else if(*word == '\0'){
            if(p->endOfWord)
                return true;
            else
                return false;
        }
        else if((pos = position(p, *word)) != notFound && p->ptrs[pos] != 0){
            p = p->ptrs[pos];
            word++;
        }
        else
            return false;
    }
}

void Trie::addCell(char ch, TrieNonLeafNode *p, int stop)   //申请非叶节点空间
{
    int i, len = strlen(p->letters);
    char *s = p->letters;
    TrieNonLeafNode **tmp = p->ptrs;
    p->letters = new char[len+2];
    p->ptrs = new TrieNonLeafNode*[len+1];
    if(p->letters == 0 || p->ptrs == 0){
        cerr << "out of memory1\n";
        exit(1);
    }
    for(i = 0; i < len + 1; i++)
        p->ptrs[i] = 0;
    if(stop < len){
        for(i = len; i >= stop + 1; i--){
            p->ptrs[i] = tmp[i-1];
            p->letters[i] = s[i-1];
        }
    }
    p->letters[stop] = ch;
    for(i = stop - 1; i >= 0; i--){
        p->ptrs[i] = tmp[i];
        p->letters[i] = s[i];
    }
    p->letters[len+1] = '\0';
    delete []s;
}

void Trie::createLeaf(char ch, char *suffix, TrieNonLeafNode *p)    //创建叶节点
{
    int pos = position(p, ch);
    if(pos == notFound){
        for(pos = 0; pos < strlen(p->letters) && p->letters[pos] < ch; pos++);
        addCell(ch, p, pos);
    }
    p->ptrs[pos] = (TrieNonLeafNode *) new TrieLeafNode(suffix);
}

void Trie::insert(char *word)
{
    TrieNonLeafNode *p = root;
    TrieLeafNode *lf;
    int offset, pos;
    char *hold = word;
    while(true){
        if(*word == '\0'){
            if(p->endOfWord)    
                cout << "Duplicate entry1 " << hold << endl;
            else
                p->endOfWord = true;
            return ;
        }
        pos = position(p, *word);
        if(pos == notFound){
            createLeaf(*word, word+1, p);
            return ;
        }
        else if(pos != notFound && p->ptrs[pos]->leaf){
            lf = (TrieLeafNode *)p->ptrs[pos];
            if(strcmp(lf->word, word + 1) == 0){
                cout << "Duplicate entry2 " << hold << endl;
                return ;
            }
            offset = 0;
            do{
                pos = position(p, word[offset]);
                if(strlen(word) == offset + 1){
                    p->ptrs[pos] = new TrieNonLeafNode(word[offset]);
                    p->ptrs[pos]->endOfWord = true;
                    createLeaf(lf->word[offset], lf->word + offset + 1, p->ptrs[pos]);
                    return ;
                }
                else if(strlen(lf->word) == offset){
                    p->ptrs[pos] = new TrieNonLeafNode(word[offset+1]);
                    p->ptrs[pos]->endOfWord = true;
                    createLeaf(word[offset+1], word + offset + 2, p->ptrs[pos]);
                    return ;
                }
                p->ptrs[pos] = new TrieNonLeafNode(word[offset+1]);
                p = p->ptrs[pos];
                offset++;
            }while(word[offset] == lf->word[offset-1]);
            offset--;
            char *s = "";
            if(strlen(word) > offset + 2)
                s = word + offset + 2;
            createLeaf(word[offset+1], s, p);
            if(strlen(lf->word) > offset + 1)
                s = lf->word + offset + 1;
            else
                s = "";
            createLeaf(lf->word[offset], s, p);
            delete [] lf->word;
            delete lf;
            return ;
        }
        else{
            p = p->ptrs[pos];
            word++;
        }
    }
}



/*****************spellcheck.cpp********************/
#include <iostream>
#include <fstream>
#include <cstdlib>
#include <cstring>
#include <cctype>
#include "trie.h"
using namespace std;

char *Strupr(char *s)
{
    char *ss;
    for(ss = s; *s = toupper(*s); s++); 
    //for循环判断条件为空就退出循环,而toupper()函数对于非字母输入则返回原值;
    //所以在遍历完s后读入NULL值的toupper()函数也会返回NULL,导致退出循环
    return ss;
}

int main(int argc, char *argv)
{
    char fileName[25], s[80], ch;
    int i, lineNum = 1;
    ifstream dictionary("dictionary");
    if(dictionary.fail()){
        cerr << "cannot open 'dictionary'\n";
        exit(-1);
    }
    dictionary >> s;
    Trie trie(Strupr(s));
    while(dictionary >> s)
        trie.insert(Strupr(s));
    trie.printTrie();
    if(argc != 2){
        cout << "Enter a file name: ";
        cin >> fileName;
    }
    else
        strcpy(fileName, &argv[1]);
    ifstream textFile(fileName);
    if(textFile.fail()){
        cout << "cannot open " << fileName << endl;
        exit(-1);
    }
    cout << "misspelled words:\n";
    textFile.get(ch);
    while(!textFile.eof()){
        while(true){
            if(!textFile.eof() && !isalpha(ch)){
                if(ch == '\n')
                    lineNum++;
                textFile.get(ch);
            }
            else break;
        }
        if(textFile.eof())
            break;
        for(i = 0; !textFile.eof() && isalpha(ch); i++){
            s[i] = toupper(ch);
            textFile.get(ch);
        }
        s[i] = '\0';
        if(!trie.wordFound(s))
            cout << s << " on line " << lineNum << endl;
    }
    dictionary.close();
    textFile.close();

    return 0;
}


展开阅读全文

没有更多推荐了,返回首页