//没经过大量测试,不知道是否有BUG
#ifndef HASHLIST_H
#define HASHLIST_H
struct Node
{
char *data;
int Index;
Node *next;
};
class HashList
{
public:
HashList();
int Insert(char *str);
int Search(char *str);
private:
Node *first;
};
#endif
#include "HashList.h"
#include <fstream>
#include <iostream>
using namespace std;
static int COUNT = 0;
HashList::HashList()
{
first = new Node;
first->next = NULL;
}
int HashList::Insert(char *str)
{
Node *p =first;
while(p->next)
{
p = p->next;
}
Node *s = new Node;
s->data = (char *)malloc(strlen(str)+1);
strcpy(s->data,str);
s->Index = COUNT++;
p->next = s;
s->next = NULL;
return COUNT-1;
}
int HashList::Search(char *str)
{
Node *p = first;
if(p->next ==NULL)
{
return -1;
}
p = p->next;
while(p)
{
if(strcmp(p->data,str) == 0)
{
return p->Index;
}
p = p->next;
}
if(p == NULL)
return -1;
}
#ifndef MWM_H
#define MWM_H
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
int number = 1001;
int ELFHash(char *str){
int hash = 0;
long x =0;
while (*str){
hash = (hash << 4) + *str++;
if((x = (hash & 0xF0000000L)) != 0){
hash ^= (x >> 24);
hash &= ~x;
}
}
int result = (hash & 0x7FFFFFFF) % number;
return result;
}
class WM{
public:
WM (){};
~WM(){};
void CreateShift(char *str[],int n);
void CreatePrefix();
void CreateHash();
void Search(char *text);
private:
int ArrShift[1001];
};
#endif
#include "mwm.h"
#include "HashList.h"
HashList Hash[1001];
struct preNode{
char *str[50];
char *strpre;
};
class PreNode{
private:
preNode pre[200];
public:
//PreNode(){ memset(str,0,4000);}
int searchstr(char *str,int n);
void insertpre(char *str);
int searchpre(char *prestr);
};
int len = 0;
PreNode ArrPre[1001];
/*********************************************
查找词词
*********************************************/
int PreNode::searchstr(char *str,int n)
{
int i = 0;
int strlength = 0;
char *p = str;
while(pre[n-1].strpre != NULL){
while(pre[n-1].str[i]!= NULL){
strlength = strlen(pre[n-1].str[i]);
char *word =(char *)malloc(strlength +1);
strcpy(word,pre[n-1].str[i]);
word[strlength] = '\0';
int j = 0;
while(j++ < strlength){
if(*p++ != *word++) break;
}
if(j == strlength+1)
printf("%s\n",pre[n-1].str[i]);
p =str;
i++;
}
n++;
i = 0;
}
return 0;
};
/*****************************************
按前缀查找
*****************************************/
int PreNode::searchpre(char *prestr)
{
int i = 0;
int tag = 0;
while(i < 200){
if(pre[i].strpre == NULL)
break;
if(strcmp(pre[i].strpre ,prestr) == 0)
{ tag = 1; break;}
i++;
}
if(tag == 1)
return i;
else
return -1;
};
/******************************************
插入前缀和词语
******************************************/
void PreNode::insertpre(char *str)
{ char *pstr = str;
char *p = (char *)malloc(4);
strncpy(p,str,3);
p[3] = '\0';
int i = 0;
while(pre[i].strpre != NULL )
if(strcmp(pre[i].strpre,p) != 0)
i++;
else break;
pre[i].strpre = (char *)malloc(4);
strncpy(pre[i].strpre,p,4);
int ii=0;
while(pre[i].str[ii] != NULL) ii++;
int length = strlen(str);
pre[i].str[ii] = (char *)malloc(length+1);
strcpy(pre[i].str[ii],pstr);
pre[i].str[ii][length] = '\0';
};
/*****************************************
创建shift ,prefix和词表
******************************************/
void WM::CreateShift(char *str[],int n)
{
memset(ArrShift,-1,1001*4); //初始化为-1
char Block[3] = {0};
if(n > 1001) return ;
len = strlen(str[0]);
for(int i = 1 ; i < n; i++){
if(len > strlen(str[i]))
len = strlen(str[i]); //获取最短模式长度
}
int cct = 0;
for(int ii = 0;ii < n ; ii++){ //对这n个模式建立三个表
int count = 0,ct = 0,index = 0;
char *p = str[ii];
while(count++ < len-1){
//"youth","state","freshness","fresh","imagination","matter"
cct++;
Block[ct++] = *p++;
Block[ct++] = *p;
Block[ct] = '\0'; //获取Block
index = ELFHash(Block);
int jud = Hash[index].Search(Block); //查找Block是否已在表中
if( jud != -1){
printf("%s\n","this block is already in the list! ");
if(ArrShift[jud] > len - count -1 || (len - count -1)==0){ //如果表中Block值大于现值或为0
ArrShift[jud] = len - count -1;
if(ArrShift[jud] == 0){ //如果为0则插入此Block
ArrPre[jud].insertpre(str[ii]);
}
ct = 0;
continue;
}
else{
printf("%s\n","else has been called");
ct = 0 ;
continue;
}
}
//当表中不存在此Block
int m = Hash[index].Insert(Block);
ArrShift[m] = len-count-1;
printf("%s\t%d\t%d\n","add num to shift",m,len-count-1);
if(ArrShift[m] == 0){
printf("%s\n","Add to pre has been called");
ArrPre[m].insertpre(str[ii]);
}
ct = 0;
}
}
printf("%d\n",cct);
}
/*************************************************
在text中查找模式
*************************************************/
void WM::Search(char *text)
{
int length = len;
int step = -1;
int lengthtemp = len;
int index =-1;
int h = -1;
char *pp = text;
char Block[3] = {0};
while(length-- > 1)
pp++;
pp--;
while(*pp){ //此处有BUG
Block[0] = *pp++;
Block[1] = *pp++;
Block[2] = '\0';
//"Youth youth is not a time of life; it is a state of mind; it is not a matter of rosy cheeks, red lips and supple knees; it is a matter of the will, a quality of the imagination, a vigor of the emotions; it is the freshness of the deep springs of life."
char *p = pp;
index = ELFHash(Block);
h = Hash[index].Search(Block);
if(h < 0){ //如果不在shift 中,则把指针向前移动,把此Block移出模式
lengthtemp = len - 2;
while(lengthtemp-- >0 )
pp++;
pp--;
continue;
}
step = ArrShift[h];
if(step > 0){ //如果在shift 中,则把指针向前移动step距离
while(step-- >0)
pp++;
pp--;
pp--;
continue;
}
if(step == 0){ //如果在shift中,且值为0,去匹配前缀
pp++;
pp++;
char *ptemp = p;
int lentemp = len-1;
while(lentemp-- > 0)
ptemp--;
ptemp--;
char *ptemp2 = ptemp;
char *pretemp = (char *)malloc(4);
strncpy(pretemp,ptemp,3);
pretemp[3] = '\0';
int strindex = -1;
if(strindex = ArrPre[h].searchpre(pretemp) >= 0){ //如果前缀匹配成功
ArrPre[h].searchstr(ptemp2,strindex); //用原模式和text匹配
}
}
//printf("this is the while end !\n");
}
}
void main()
{
char *str[] = {"youth","state","freshness","fresh","staes","imagination","aaaes","matter","knees","cheeks"};
WM MySearch;
MySearch.CreateShift(str,10);
char *test ="Youth youth is not staesa time of life; aaaes it is a state of mind; it is not a matter of rosy cheeks, red lips and supple knees; it is a matter of the will, a quality of the imagination, a vigor of the emotions; it is the freshness of the deep springs of life.";
MySearch.Search(test);
//printf("%s\n","this is the main end");
system("pause");
}