多模式字符串匹配

3 篇文章 0 订阅

//没经过大量测试,不知道是否有BUG

 

#ifndef HASHLIST_H
#define HASHLIST_H


struct Node
{
 char *data;
 int Index;
 Node *next;

};

class HashList
{
public:
 HashList();
 int Insert(char *str);
 int Search(char *str);
private:

 Node *first;
 
};


#endif

 

 

#include "HashList.h"
#include <fstream>
#include <iostream>
using namespace std;
static int COUNT = 0;
HashList::HashList()
{
 first = new Node;
 first->next = NULL;

}

int HashList::Insert(char *str)
{
 Node *p =first;
 while(p->next)
 {
  p = p->next; 
 }

 Node *s = new Node;
 s->data = (char *)malloc(strlen(str)+1);
 strcpy(s->data,str);
 s->Index = COUNT++;
 p->next = s;
 s->next = NULL;

 return COUNT-1;

}
int HashList::Search(char *str)
{
 Node *p = first;
 if(p->next ==NULL)
 {
  return -1;
 }
 p = p->next;
 while(p)
 {
  if(strcmp(p->data,str) == 0)
   
  {
    return p->Index; 
    
  }
  p = p->next;
 }

 if(p == NULL)
      return -1;

 

}

 

#ifndef MWM_H
#define MWM_H
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
int number = 1001;
int ELFHash(char *str){  
      int hash = 0;  
      long x =0;    
      while (*str){
          hash = (hash << 4) + *str++;  
          if((x = (hash & 0xF0000000L)) != 0){  
              hash ^= (x >> 24);  
              hash &= ~x;  
          }  
      }  
      int result = (hash & 0x7FFFFFFF) % number;  
      return result;  
}

class WM{
public:
 WM (){};
 ~WM(){};
 void CreateShift(char *str[],int n);
 void CreatePrefix();
 void CreateHash();
 void Search(char *text);
 
private:

 int ArrShift[1001];
 

};

 

 


#endif

 

#include "mwm.h"
#include "HashList.h"

HashList Hash[1001];
struct preNode{
char *str[50];
char *strpre;
};
class PreNode{
private:
 
 preNode pre[200];
public:
 //PreNode(){ memset(str,0,4000);}
 int searchstr(char *str,int n); 
 void insertpre(char *str);
 int searchpre(char *prestr);
 
};
int len = 0;
PreNode ArrPre[1001];
/*********************************************
  查找词词
*********************************************/
int PreNode::searchstr(char *str,int n)

 int i = 0;
 int strlength = 0;
 char *p = str;
 while(pre[n-1].strpre != NULL){
  while(pre[n-1].str[i]!= NULL){
  
   strlength = strlen(pre[n-1].str[i]);
   char *word =(char *)malloc(strlength +1);
   strcpy(word,pre[n-1].str[i]);
   word[strlength] = '\0';
   int j = 0;
   while(j++ < strlength){
    if(*p++ != *word++) break;

   }
  
   if(j == strlength+1)
    printf("%s\n",pre[n-1].str[i]);
   p =str;
   i++;
  }
  n++;
  i = 0;
 }
 return 0;
};
/*****************************************
  按前缀查找
*****************************************/
int PreNode::searchpre(char *prestr)

 int i = 0;
 int tag = 0;
 
 while(i < 200){
  if(pre[i].strpre  == NULL)
   break;
  
  if(strcmp(pre[i].strpre ,prestr) == 0)
  { tag = 1; break;}
  i++;
 }
 if(tag == 1)
  return i;
 else
  return -1;
};
/******************************************
  插入前缀和词语
******************************************/
void PreNode::insertpre(char *str)
{ char *pstr = str;
 char *p = (char *)malloc(4);
 strncpy(p,str,3);
 p[3] = '\0';
 
 int i = 0;
 
 while(pre[i].strpre != NULL )
  if(strcmp(pre[i].strpre,p) != 0)
   i++;
  else break;
 pre[i].strpre = (char *)malloc(4);
 strncpy(pre[i].strpre,p,4);
 int ii=0;
 while(pre[i].str[ii] != NULL) ii++;
 int length = strlen(str);
 pre[i].str[ii] = (char *)malloc(length+1);
 strcpy(pre[i].str[ii],pstr);
 pre[i].str[ii][length] = '\0';
 
};

/*****************************************
 创建shift ,prefix和词表 
******************************************/
void WM::CreateShift(char *str[],int n)

 memset(ArrShift,-1,1001*4); //初始化为-1
 char Block[3] = {0};
 if(n > 1001) return ;
  len = strlen(str[0]);
 for(int i = 1 ; i < n; i++){
  if(len > strlen(str[i]))
   len = strlen(str[i]);     //获取最短模式长度
 }
 int cct = 0;
 for(int ii = 0;ii < n ; ii++){         //对这n个模式建立三个表
  int count = 0,ct = 0,index = 0;
  char *p = str[ii];
  while(count++ < len-1){
   //"youth","state","freshness","fresh","imagination","matter"
   cct++;
    Block[ct++] = *p++;   
    Block[ct++] = *p;
    Block[ct] = '\0';          //获取Block
    
   index = ELFHash(Block);
   int jud = Hash[index].Search(Block); //查找Block是否已在表中
   if( jud != -1){
    printf("%s\n","this block is already in the list! ");
    if(ArrShift[jud] > len - count -1 || (len - count -1)==0){ //如果表中Block值大于现值或为0
     ArrShift[jud] = len - count -1;
     if(ArrShift[jud] == 0){   //如果为0则插入此Block
      
      ArrPre[jud].insertpre(str[ii]);
     }
     ct = 0;
     continue;
    }
    else{
     printf("%s\n","else has been called");
     ct = 0 ;
     continue;
    }
   }
   //当表中不存在此Block
   int m = Hash[index].Insert(Block);  
   ArrShift[m] = len-count-1; 
   printf("%s\t%d\t%d\n","add num to shift",m,len-count-1);
   if(ArrShift[m] == 0){
    printf("%s\n","Add to pre has been called");
    ArrPre[m].insertpre(str[ii]);
    
   }
   ct = 0;
    
  }
 
  
 }

 printf("%d\n",cct);
}
/*************************************************
    在text中查找模式
*************************************************/
void WM::Search(char *text)

 int length = len;
 int step = -1;
 int lengthtemp = len;
 int index =-1;
 int h = -1;
 char *pp = text;
 char Block[3] = {0};
 while(length-- > 1)
  pp++;
  pp--;
 while(*pp){ //此处有BUG
  
  Block[0] = *pp++;
  Block[1] = *pp++;
  Block[2] = '\0';
  //"Youth  youth is not a time of life; it is a state of mind; it is not a matter of rosy cheeks, red lips and supple knees; it is a matter of the will, a quality of the imagination, a vigor of the emotions; it is the freshness of the deep springs of life."
  char *p = pp;
  index = ELFHash(Block);
  h = Hash[index].Search(Block);
  if(h < 0){   //如果不在shift 中,则把指针向前移动,把此Block移出模式
   lengthtemp = len - 2;
   while(lengthtemp-- >0 )
    pp++;
    pp--;
   continue;
  }
  step = ArrShift[h];
  if(step > 0){  //如果在shift 中,则把指针向前移动step距离
   while(step-- >0)  
    pp++;
   pp--;
   pp--;
   continue;
  }
  if(step == 0){     //如果在shift中,且值为0,去匹配前缀
   pp++;
   pp++;
   char *ptemp = p;
   
   int lentemp = len-1;
   while(lentemp-- > 0)
    ptemp--;
    ptemp--;
   char *ptemp2 = ptemp;
   char *pretemp = (char *)malloc(4);
   strncpy(pretemp,ptemp,3);
   pretemp[3] = '\0';
   int strindex = -1;
   if(strindex = ArrPre[h].searchpre(pretemp) >= 0){  //如果前缀匹配成功
    ArrPre[h].searchstr(ptemp2,strindex);  //用原模式和text匹配
   }
  
  }
   //printf("this is the while end !\n");

 }
    
 
}
void main()
{
 char *str[] = {"youth","state","freshness","fresh","staes","imagination","aaaes","matter","knees","cheeks"};

 WM MySearch;
 MySearch.CreateShift(str,10);
 char *test ="Youth  youth is not staesa time of life; aaaes it is a state of mind; it is not a matter of rosy cheeks, red lips and supple knees; it is a matter of the will, a quality of the imagination, a vigor of the emotions; it is the freshness of the deep springs of life.";
 MySearch.Search(test);
 //printf("%s\n","this is the main end");
 system("pause");
}

 

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值