字符串的查找算法

最新推荐文章于 2022-07-27 17:30:16 发布

Jack_Lpz

最新推荐文章于 2022-07-27 17:30:16 发布

阅读量612

点赞数

分类专栏：算法

本文链接：https://blog.csdn.net/u012016940/article/details/39237109

版权

算法专栏收录该内容

8 篇文章 0 订阅

订阅专栏

//包括BF算法 KMP算法 Hash算法
#include "stdafx.h"
#include <iostream>
#include <string>
using namespace std;

static int find(char S[],char T[], int pos)
{
int i = pos;
int j = 0;
int sLen = strlen(S);
int tLen = strlen(T);
cout<<sLen<<"   "<<tLen<<endl;
if(pos<0 || pos>sLen)
  throw out_of_range("越界错误!");
while(i< sLen && j<tLen)
{
  if(S[i] == T[j]){i++;j++;} //如果相等则依次往下遍历
  else
  {
   i = i-j+1; //回到当前位置的下一个位置
   j = 0;   //重新置0 使得从头开始遍历
  }
}
if(j>=tLen) return i-j;
return -1;
}

/*
我们会发现这个算法存在着效率问题着实算来它的时间复杂度为O(sLen*tLen) 当大量存在回溯现象的时候
便显得有些吃力
例如 sStr = "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaabbbbbbbb";
       tStr = "aaaaaaaaaaaaaaaaaaaaaaaaaaabbbbb";
    的时候，每次模式串在主串中遍历的时候，总会在模式串出现b的时候，不匹配，但是这时每次都要遍历
    大量的a字符，使得模式串每次都会从j=0----->j=27 j=28后j又会变为0
   现在咋们看看KMP算法的优势（代码如下）
*/

static int kmp_find(char S[],char T[], int pos)
{
int i=pos,j=1;
int sLen = strlen(S);
int tLen = strlen(T);
if(pos<0 || pos>sLen)
   throw out_of_range("越界错误!");
int *next = new int[tLen];
next[0] = -1;
while(j<tLen)
{
   int val = next[j-1];
   while(T[j] != T[val+1] && val>=0) val = next[val];
   if(T[j] == T[val+1])
    next[j] = val+1;
   else
    next[j] = -1;
   j++;
}
/* while(i < tLen)
{
   if(j==0 || T[i] ==T[j])
   {
    ++i;
    ++j;
    if(T[i] != T[j]) next[i] = j;
    else
     next[i] = next[j];
   }
   else
    j = next[j];
}
i = pos;*/
j=0;
while(i<sLen && j<tLen)
{
   if(S[i] == T[j])
   {
    i++;
    j++;
   }
   else if(j==0) i++;
   else
    j = next[j-1]+1;
}
delete []next;
if(j >= tLen) return i-j;
return -1;
}

/*
KMP 算法能够有效的防止回溯现象指针i是不会向后指的而在上面的算法中 i = i-j+1;
此时指针的回溯则比较大
KMP的理解这里有一篇博客讲的比较详细
http://blog.chinaunix.net/uid-26548237-id-3367953.html
主要是匹配有点难以理解呵呵
"部分匹配"的实质是，有时候，字符串头部和尾部会有重复。
比如，"ABCDAB"之中有两个"AB"，那么它的"部分匹配值"就是2（"AB"的长度）。
搜索词移动的时候，第一个"AB"向后移动4位（字符串长度-部分匹配值），
就可以来到第二个"AB"的位置。

int _tmain(int argc, _TCHAR* argv[])
{
char a[] = "qqqqqqqqqqqqqwwwwww";
char b[] = "qqqqwww";
int pos = kmp_find(a,b,0);
cout<<pos<<endl;
getchar();

return 0;
}

/*
//还有一些字符串查找算法例如 hash字符串查找（以下是摘自互联网的算法）
unsigned long cryptTable[0x500];
//以下的函数生成一个长度为0x500（合10进制数：1280）的cryptTable[0x500]
void prepareCryptTable()
{
  unsigned long seed = 0x00100001, index1 = 0, index2 = 0, i;
  for( index1 = 0; index1 < 0x100; index1++ )
  {
   for( index2 = index1, i = 0; i < 5; i++, index2 += 0x100 )
   {
    unsigned long temp1, temp2;

    seed = (seed * 125 + 3) % 0x2AAAAB;
    temp1 = (seed & 0xFFFF) << 0x10;

    seed = (seed * 125 + 3) % 0x2AAAAB;
    temp2 = (seed & 0xFFFF);

    cryptTable[index2] = ( temp1 | temp2 );
   }
  }
}
//以下函数计算lpszFileName 字符串的hash值，其中dwHashType 为hash的类型，
//在下面GetHashTablePos函数里面调用本函数，其可以取的值为0、1、2；该函数
//返回lpszFileName 字符串的hash值；
unsigned long HashString( char *lpszFileName, unsigned long dwHashType )
{
  unsigned char *key = (unsigned char *)lpszFileName;
  unsigned long seed1 = 0x7FED7FED;
  unsigned long seed2 = 0xEEEEEEEE;
  int ch;
  while( *key != 0 )
  {
   ch = toupper(*key++);

   seed1 = cryptTable[(dwHashType << 8) + ch] ^ (seed1 + seed2);
   seed2 = ch + seed1 + seed2 + (seed2 << 5) + 3;
  }
  return seed1;
}
//在main中测试argv[1]的三个hash值：
int main( int argc, char **argv )
{
  unsigned long ulHashValue;
  int i = 0;
  if ( argc != 2 )
  {
   printf("please input two arguments/n");
   return -1;
  }
     //初始化数组：crytTable[0x500]
  prepareCryptTable();
    //打印数组crytTable[0x500]里面的值
  for ( ; i < 0x500; i++ )
  {
         if ( i % 10 == 0 )
      {
    printf("/n");
    }
   printf("%-12X", cryptTable[i] );
     }
  ulHashValue = HashString( argv[1], 0 );
  printf("/n----%X ----/n", ulHashValue );
  ulHashValue = HashString( argv[1], 1 );
  printf("----%X ----/n", ulHashValue );
  ulHashValue = HashString( argv[1], 2 );
  printf("----%X ----/n", ulHashValue );
  return 0;
}
*/

Jack_Lpz

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
0
评论
字符串的查找算法

//包括BF算法 KMP算法 Hash算法#include "stdafx.h"#include #include using namespace std;static int find(char S[],char T[], int pos){ int i = pos; int j = 0; int sLen = strlen(S); int tLen =
复制链接

扫一扫

专栏目录