//myfunc.h-----------------------------------------------------
#ifndef DICTIONNARY_HEAD
#define DICTIONNARY_HEAD 1
#include<iostream>
#include<fstream>
#include<vector>
using namespace::std;
#define MeanKinds 1000
#define MaxWordLen 100
bool isLetter(char a)
{
if(a>='a'&&a<='z'||a>='A'&&a<='Z')
return true;
else return false;
}
char lowercase(char a)
{
if(!isLetter(a))
{
cout<<"error in function lowercase,because "<<a<<" is not a Letter!"<<endl;
return '?';
}
else
{
if(a<='Z')return a+'a'-'A';
else return a;
}
}
int index(char a)
{
return a-'a';
}
int hash(char * key)
{
unsigned long h=0;
while(*key)
{
h=(h<<4) + *key++;
unsigned long g=h & 0xF0000000L;
if(g)h^=g>>24;
h&=~g;
}
return h % MeanKinds;
}
class Position
{
public:
int line;
int column;
Position(int l=0,int c=0)
{
line=l;
column=c;
}
};
class HashNode
{
public:
char * word;
HashNode * next;
vector<Position> gps;
HashNode(char * _w=NULL)
{
next=NULL;
if(_w!=NULL)
{
int len=strlen(_w);
word=new char[len];
strcpy(word,_w);
}
}
};
class HashList
{
HashNode head;
public:
void add(char * _w,int line,int column)
{
HashNode * trade=head.next;
while(trade!=NULL)
{
if(strcmp(trade->word,_w)==0)
{
(trade->gps).push_back(*(new Position(line,column)));
return;
}
trade=trade->next;
}
trade=head.next;
if(_w!=NULL)
{
head.next=new HashNode();
head.next->word=new char[strlen(_w)];
strcpy(head.next->word,_w);
(head.next->gps).push_back(*(new Position(line,column)));
head.next->next=trade;
}
}
void travel(ofstream & fout)
{
HashNode * trade=head.next;
while(trade!=NULL)
{
fout<<trade->word<<"/t";
trade=trade->next;
}
fout<<"/n";
}
vector<Position> find(char * w)
{
HashNode * trade=head.next;
while(trade!=NULL)
{
if(strcmp(trade->word,w)==0)return trade->gps;
trade=trade->next;
}
cout<<"没有找到"<<endl;
return *new vector<Position>;
}
};
class HashTable
{
HashList h[MeanKinds];
ofstream fout;
public:
HashTable()
{
fout.open("hashout.txt",ios::ate);
}
void addWord(char * _w,int line,int column)
{
unsigned int i=hash(_w);
h[i].add(_w,line,column);
}
void travel()
{
for(int i=0;i<MeanKinds;i++)
{
fout<<i<<": ";
h[i].travel(fout);
}
fout.close();
}
vector<Position> find(char * w)
{
unsigned int i=hash(w);
return h[i].find(w);
}
};
#endif
//---------------------dictionary.cpp-----------
/*
author:zhanghuichao
*/
#include "myfun.h"
/*
关键技术:patrition 树、散列、字典排序
*/
HashTable hashtable;
class Filtor
{
public:
char* t[3];
Filtor()
{
t[0]="t";
t[1]="s";
t[2]="ll";
}
bool isWord(char * w)
{
for(int i=0;i<sizeof(t)/sizeof(int);i++)
{
if(strcmp(t[i],w)==0)return false;
}
return true;
}
};
class SubWord
{
public:
SubWord * suc[26];
int freq[26];
SubWord()
{
for(int i=0;i<26;i++)
{
suc[i]=NULL;
freq[i]=0;
}
}
};
class Splitor
{
public:
SubWord head;//partition树
Filtor filtor;
ifstream fin;//("in.txt",ios::in);
ofstream fout;//("out.txt",ios::out);
int line,column;
Splitor()
{
fin.open("in.txt",ios::in);
fout.open("out.txt",ios::out);
}
void startSplit()
{
char s[1001];//保存一行
line=0;
while( !fin.eof() )
{
fin.getline(s,1000,'/n');
splitSentence(line,s);
line++;
}
dict2file(); //把分词结果输出到文件中
hashtable.travel(); //输出到文件中
}
void splitSentence(int line,char * s) //把一行 切割成单词 并存表
{
char w[1001];//保存从行s中分割出的一个单词
for(int i=0;i<strlen(s);i++)
{
int j=0;
while(i<strlen(s) && !isLetter(s[i]))i++;
while(i<strlen(s) && isLetter(s[i]))w[j++]=lowercase(s[i++]);
w[j]=0;
hashtable.addWord(w,line,column=i);
//-------------此处再加过滤模块 如mary's brother 中会分割出s来 而s不能作为一个单词
partition(w);
//cout<<w<<endl;
}
}
void partition(char *w)
{
if(!filtor.isWord(w))
{
return;
}
int len=strlen(w);
if(len>MaxWordLen)
{
cout<<"warning:单词"<<w<<"长度过大过大!"<<endl;
return;
}
SubWord * trade=&head;
for(int i=0;i<len-1;i++)
{
if(trade->suc[index(w[i])]==NULL)
{
trade->suc[index(w[i])]=new SubWord();
}
trade=trade->suc[index(w[i])];
}
trade->freq[index(w[len-1])]+=1;
}
void travelPtree(SubWord * p,char * _w,bool write2file)
{
if(p==NULL)return;
int len=strlen(_w);
_w[len+1]=0;
for(int i=0;i<26;i++)
{
_w[len]=i+'a';
if(p->freq[i]!=0)
{
if(write2file==true)
{
fout<<"/t"<<_w;
for(int t=0;t<3-(len+1)/8;t++)
fout<<"/t";
fout<<"-------->"<<p->freq[i]<<"/n";
}
}
if(p->suc[i]!=NULL)
{
travelPtree(p->suc[i],_w,write2file);
}
}//输出已经在本层截至的词缀
_w[len]=0;
}
void dict2file()
{
char w[1001];
w[0]=0;
travelPtree(&head,w,true);
}
};
//用户可以检索 功能扩展-----自动帮你抽取出关键字来
void userService()
{
char c;
char _word[100];
while(true)
{
do
{
cout<<"检索吗(y:n)?/n";
cout<<"请选择:";
cin>>c;
}while(c!='y'&&c!='n');
if(c=='n')return ;
cout<<"请输入检索单词:";
cin>>_word;
vector<Position> vivid =hashtable.find(_word);
vector<Position>::iterator dagger;
for(dagger=vivid.begin();dagger!=vivid.end();dagger++)
{
Position *t=dagger;
cout<<"( line="<<t->line<<" , "<<"column="<<t->column<<" ) /n";
}
cout<<"/n";
}
}
//----------------------------------------------------------
int main()
{
Splitor s;
s.startSplit();
//添加了检索功能
userService();
return 0;
}