trie:用键值的一部分来确定查找路径的树称为trie
节点是一个对象,包含以下成员:一个叶/非叶节点标志,一个单词结束标志,一个指向字符串的指针,以及一个指向指针数组的指针,该指针指向同样类型的结构
class TrieNonLeafNode
{
public:
TrieNonLeafNode(){ }
TrieNonLeafNode(char);
//~TrieNonLeafNode();
private:
bool leaf, endOfWord; //叶、非叶节点标志,单词结束标志
char *letters; //指向字符串的指针
TrieNonLeafNode **ptrs; //指向指针数组的指针
friend class Trie;
};
通过读取字典中的单词,来确定输入的单词是不是错误(不区分大小写),测试如下
/*****************dictionary*******************/
a
ara
are
area
eerie
Eire
era
ere
Erie
ire
IPA
pear
peer
per
pier
rear
rep
完整代码如下:
/*****************trie.h*******************/
#pragma once
class TrieNonLeafNode
{
public:
TrieNonLeafNode(){ }
TrieNonLeafNode(char);
//~TrieNonLeafNode();
private:
bool leaf, endOfWord; //叶、非叶节点标志,单词结束标志
char *letters; //指向字符串的指针
TrieNonLeafNode **ptrs; //指向指针数组的指针
friend class Trie;
};
class TrieLeafNode
{
public:
TrieLeafNode(){ }
TrieLeafNode(char *);
//~TrieLeafNode();
private:
bool leaf;
char *word;
friend class Trie;
};
class Trie
{
public:
Trie():notFound(-1){ }
Trie(char *);
void printTrie(){
*prefix = '\0';
printTrie(0, root, prefix);
}
void insert(char *);
bool wordFound(char *);
private:
TrieNonLeafNode *root;
const int notFound;
char prefix[80];
int position(TrieNonLeafNode *, char);
void addCell(char, TrieNonLeafNode *, int);
void createLeaf(char, char *, TrieNonLeafNode *);
void printTrie(int, TrieNonLeafNode *, char *);
};
/*****************trie.cpp*******************/
#include <iostream>
#include <cstring>
#include <cstdlib>
#include "trie.h"
using namespace std;
TrieLeafNode::TrieLeafNode(char *suffix) //创建叶节点
{
leaf = true;
word = new char[strlen(suffix)+1];
if(word == 0){
cerr << "out of memory2\n";
exit(-1);
}
strcpy_s(word, strlen(suffix) + 1, suffix);
}
TrieNonLeafNode::TrieNonLeafNode(char ch) //创建非页节点
{
ptrs = new TrieNonLeafNode *;
letters = new char[2];
if(ptrs == 0 || letters == 0){
cerr << "out of memory3\n";
exit(1);
}
leaf = false;
endOfWord = false;
*ptrs = 0;
*letters = ch;
*(letters + 1) = '\0';
}
Trie::Trie(char *word):notFound(-1)
{
root = new TrieNonLeafNode(*word);
createLeaf(*word, word+1, root);
}
void Trie::printTrie(int depth, TrieNonLeafNode *p, char *prefix)
{
int i;
if(p->leaf){ //叶节点
TrieLeafNode *lf = (TrieLeafNode *)p;
for(int i = 1; i <= depth; i++)
cout << " ";
cout << " >>" << prefix << "|" << lf->word << endl;
}
else{ //非叶节点
for(i = strlen(p->letters)-1; i >= 0; i--){
if(p->ptrs[i]!= '\0'){
prefix[depth] = p->letters[i];
prefix[depth+1] = '\0';
printTrie(depth + 1, p->ptrs[i], prefix);
}
}
if(p->endOfWord){
prefix[depth] = '\0';
for(i = 1; i <= depth; i++)
cout << " ";
cout << ">>>" << prefix << endl;
}
}
}
int Trie::position(TrieNonLeafNode *p, char ch) //寻找字符在指针数组中的位置
{
int i;
for(i = 0; i < strlen(p->letters) && p->letters[i] != ch; i++);
if(i < strlen(p->letters))
return i;
else
return notFound;
}
bool Trie::wordFound(char *word)
{
TrieNonLeafNode *p = root;
TrieLeafNode *lf;
int pos;
while(true){
if(p->leaf){
lf = (TrieLeafNode *)p;
if(strcmp(word, lf->word) == 0)
return true;
else
return false;
}
else if(*word == '\0'){
if(p->endOfWord)
return true;
else
return false;
}
else if((pos = position(p, *word)) != notFound && p->ptrs[pos] != 0){
p = p->ptrs[pos];
word++;
}
else
return false;
}
}
void Trie::addCell(char ch, TrieNonLeafNode *p, int stop) //申请非叶节点空间
{
int i, len = strlen(p->letters);
char *s = p->letters;
TrieNonLeafNode **tmp = p->ptrs;
p->letters = new char[len+2];
p->ptrs = new TrieNonLeafNode*[len+1];
if(p->letters == 0 || p->ptrs == 0){
cerr << "out of memory1\n";
exit(1);
}
for(i = 0; i < len + 1; i++)
p->ptrs[i] = 0;
if(stop < len){
for(i = len; i >= stop + 1; i--){
p->ptrs[i] = tmp[i-1];
p->letters[i] = s[i-1];
}
}
p->letters[stop] = ch;
for(i = stop - 1; i >= 0; i--){
p->ptrs[i] = tmp[i];
p->letters[i] = s[i];
}
p->letters[len+1] = '\0';
delete []s;
}
void Trie::createLeaf(char ch, char *suffix, TrieNonLeafNode *p) //创建叶节点
{
int pos = position(p, ch);
if(pos == notFound){
for(pos = 0; pos < strlen(p->letters) && p->letters[pos] < ch; pos++);
addCell(ch, p, pos);
}
p->ptrs[pos] = (TrieNonLeafNode *) new TrieLeafNode(suffix);
}
void Trie::insert(char *word)
{
TrieNonLeafNode *p = root;
TrieLeafNode *lf;
int offset, pos;
char *hold = word;
while(true){
if(*word == '\0'){
if(p->endOfWord)
cout << "Duplicate entry1 " << hold << endl;
else
p->endOfWord = true;
return ;
}
pos = position(p, *word);
if(pos == notFound){
createLeaf(*word, word+1, p);
return ;
}
else if(pos != notFound && p->ptrs[pos]->leaf){
lf = (TrieLeafNode *)p->ptrs[pos];
if(strcmp(lf->word, word + 1) == 0){
cout << "Duplicate entry2 " << hold << endl;
return ;
}
offset = 0;
do{
pos = position(p, word[offset]);
if(strlen(word) == offset + 1){
p->ptrs[pos] = new TrieNonLeafNode(word[offset]);
p->ptrs[pos]->endOfWord = true;
createLeaf(lf->word[offset], lf->word + offset + 1, p->ptrs[pos]);
return ;
}
else if(strlen(lf->word) == offset){
p->ptrs[pos] = new TrieNonLeafNode(word[offset+1]);
p->ptrs[pos]->endOfWord = true;
createLeaf(word[offset+1], word + offset + 2, p->ptrs[pos]);
return ;
}
p->ptrs[pos] = new TrieNonLeafNode(word[offset+1]);
p = p->ptrs[pos];
offset++;
}while(word[offset] == lf->word[offset-1]);
offset--;
char *s = "";
if(strlen(word) > offset + 2)
s = word + offset + 2;
createLeaf(word[offset+1], s, p);
if(strlen(lf->word) > offset + 1)
s = lf->word + offset + 1;
else
s = "";
createLeaf(lf->word[offset], s, p);
delete [] lf->word;
delete lf;
return ;
}
else{
p = p->ptrs[pos];
word++;
}
}
}
/*****************spellcheck.cpp********************/
#include <iostream>
#include <fstream>
#include <cstdlib>
#include <cstring>
#include <cctype>
#include "trie.h"
using namespace std;
char *Strupr(char *s)
{
char *ss;
for(ss = s; *s = toupper(*s); s++);
//for循环判断条件为空就退出循环,而toupper()函数对于非字母输入则返回原值;
//所以在遍历完s后读入NULL值的toupper()函数也会返回NULL,导致退出循环
return ss;
}
int main(int argc, char *argv)
{
char fileName[25], s[80], ch;
int i, lineNum = 1;
ifstream dictionary("dictionary");
if(dictionary.fail()){
cerr << "cannot open 'dictionary'\n";
exit(-1);
}
dictionary >> s;
Trie trie(Strupr(s));
while(dictionary >> s)
trie.insert(Strupr(s));
trie.printTrie();
if(argc != 2){
cout << "Enter a file name: ";
cin >> fileName;
}
else
strcpy(fileName, &argv[1]);
ifstream textFile(fileName);
if(textFile.fail()){
cout << "cannot open " << fileName << endl;
exit(-1);
}
cout << "misspelled words:\n";
textFile.get(ch);
while(!textFile.eof()){
while(true){
if(!textFile.eof() && !isalpha(ch)){
if(ch == '\n')
lineNum++;
textFile.get(ch);
}
else break;
}
if(textFile.eof())
break;
for(i = 0; !textFile.eof() && isalpha(ch); i++){
s[i] = toupper(ch);
textFile.get(ch);
}
s[i] = '\0';
if(!trie.wordFound(s))
cout << s << " on line " << lineNum << endl;
}
dictionary.close();
textFile.close();
return 0;
}