kmp总结

最新推荐文章于 2022-06-01 13:36:23 发布

czysjr

最新推荐文章于 2022-06-01 13:36:23 发布

阅读量475

点赞数

分类专栏：字符串算法

本文链接：https://blog.csdn.net/czysjr/article/details/40510459

版权

字符串算法专栏收录该内容

4 篇文章 0 订阅

订阅专栏

因为kmp学的时间有点早，所以这也是我一直有些含糊的算法之一吧

matrix67这篇很经典的kmp的讲解也不知道看过多少遍了。。。对了今天看到了这个关于matrix67和他老婆的故事真的觉得很感动。。（一下子浪费了半个小时T T）简直是让人相信爱情了T T。。还有超有爱的这个

呜呜呜太感人了

言归正传，先上几道裸题吧

poj 3461

纯裸题， kmp有一个易错点是它两次循环的初始值是不一样的！字串中因为是要和它的前缀匹配所以是从1开始扫而主串中因为是要和字串进行匹配所以就从0开始！

#include <iostream>
#include <cstdio>
#include <algorithm>
#include <cmath>
#include <cstring>
using namespace std;
char a[10005], b[1000005];
int next[10005];
int main()
{
    int t; scanf("%d", &t); while(t --){
        scanf("%s%s",  a, b);
        int lena = strlen(a), lenb = strlen(b), ans = 0;
        int j = -1; next[0] = -1;
        for(int i = 1; i < lena; i ++){
            while(j >= 0 && a[j + 1] != a[i])j = next[j];
            if(a[j + 1] == a[i])j ++;
            next[i] = j;
        }   j = -1; 
        for(int i = 0; i < lenb; i ++){
            while(j >= 0 && a[j + 1] != b[i])j = next[j];
            if(a[j + 1] == b[i])j ++;
            if(j == lena - 1){
                ans ++; j = next[j];    
            }    
        }cout<<ans<<endl;
    }
    //system("pause");
    return 0;
}

poj 2406

这看起来真的是蛮神奇的一件事：

一个有循环节 k 的串它从第k + 1 位到第 n 位的next数组是单调的！

其实仔细想想这还是挺显然的，但是如果只是让我自己想的话肯定不会吧这个和kmp联系起来。。。

简单的证明：

假设我们有一个字符串ababab,那么next[6]=4对吧，由于next的性质是，匹配失败后，下一个能继续进行匹配的位置，也就是说，把字符串的前四个字母，abab,平移2个单位，这个abab一定与原串的abab重合（否则就不满足失败函数的性质），这说明了什么呢，由于字符串进行了整体平移，而平移后又要重叠，那么必有
s[1]=s[3],s[2]=s[4],s[3]=s[5],s[4]=s[6].说明长度为2的字符串在原串中一定重复出现，

代码太短了。。

poj 2752

是真的每一题都太机智了还是我太傻了啊。。。

论next 数组的100中用途。。

因为我们知道 lens 一定是一个可行解，而其余的解就是它既是它的开头又是它的结尾，而其中最大的就是next[lensn]，所以就很好求出全部的解了。

#include <iostream>
#include <cstdio>
#include <algorithm>
#include <cmath>
#include <cstring>
#define N 1000005
using namespace std;
int next[N];
char s[N];
void work(int x){
    if(x == -1)return;
    work(next[x]);
    printf("%d ", x + 1);    
}
int main()
{
    while(scanf("%s", s) != EOF ){
        int j = -1; next[0] = -1;
        int lens = strlen(s);
        for(int i = 1; i < lens; i ++){
            while(j >= 0 && s[j + 1] != s[i])j = next[j];
            if(s[j + 1] == s[i])j ++;
            next[i] = j;    
        }   
     //   for(int i = 0; i < lens; i ++)printf("%d ", next[i]);cout<<endl;
        work(lens - 1);cout<<endl;
    }//system("pause");
    return 0;
}

hdu 4763

很简单的题，还是找出next后然后找规律，把中间的数扫一遍就行了

#include <iostream>
#include <cstdio>
#include <algorithm>
#include <cmath>
#include <cstring>
#define N 1000005
using namespace std;
char s[N];
int next[N], lens;
bool pd(int x){
    for(int i = x * 2 - 1; i < lens - x; i ++)
        if(next[i] >= x - 1)return 1; return 0;    
}
int main()
{
    int t; scanf("%d", &t); while(t --){
        scanf("%s", s);
        int j = -1; next[0] = -1; lens = strlen(s);
        for(int i = 1; i < lens; i ++){
            while(j >= 0 && s[j + 1] != s[i])j = next[j];
            if(s[j + 1] == s[i])j ++;
            next[i] = j;    
        }    
        int ans = 0;
        for(int i = next[lens - 1] + 1; i > 0; i --)
            if(pd(i)){ans = i; break;}  
        cout<<ans<<endl;
    }
   // system("pause");
    return 0;
}

poj 2185

就是把每一行每一列长和宽分别求出来循环的序列数，然后分别算出来长和宽需要的数量乘起来就好了。

#include <iostream>
#include <cstdio>
#include <algorithm>
#include <cmath>
#include <cstring>
#define si s[i]
using namespace std;
int r, c, next[10005];
char s[10005][80];
int gcd(int a, int b){return a ? gcd(b % a, a) : b; }
int xgb(int a, int b){return a * b / gcd(a, b);}
int main()
{
    scanf("%d%d", &r, &c);
    int tmpx = 1, tmpy = 1;
    for(int i = 0; i < r; i ++){
        scanf("%s", s[i]);
        int j = -1; next[0] = -1;
        for(int k = 1; k < c; k ++){
            while(j >= 0 && si[j + 1] != si[k])j = next[j];
            if(si[j + 1] == si[k])j ++;
            next[k] = j;    
        }
        tmpx = xgb(tmpx, c - next[c - 1] - 1); 
    }
    if(tmpx > c) tmpx = c;
    for(int i = 0; i < c; i ++){
        int j = -1;
        for(int k = 1; k < r; k ++){
            while(j >= 0 && s[k][i] != s[j + 1][i])j = next[j];
            if(s[j + 1][i] == s[k][i])j ++;
            next[k] = j;
        }
        tmpy = xgb(tmpy, r - next[r - 1] - 1);
    }
    if(tmpy > r)tmpy = r;
    cout<<tmpx * tmpy<<endl;
   // system("pause");
    return 0;
}

这里算是一个kmp 的应用吧，单看这个结论真的是太神奇了，但是想到证明的原因就会觉得很显然了。

连续抛一枚硬币抛出长为L的不同排列的几率是不同的，而不是我们以为的 1/ (2 ^ L)。每次试配的时候重新匹配的起始节点应该是这个点的取反（也就是这次掷出来的点）与需要匹配出来的串的前缀。

基础的kmp就到这里吧，下面是拓展kmp

扩展kmp 讲得很好了，字符串这种东西果然是要用图才能讲明白的

现在这个代码是有错的

#include <iostream>
#include <cstdio>
#include <algorithm>
#include <cmath>
#include <cstring>
using namespace std;
char a[55], b[55];
int next[55], ext[55];
int main()
{
    scanf("%s%s", a, b);
    int lena = strlen(a), j = 0;
    while(j + 1 < lena && a[j] == a[j + 1])j ++;
    next[1] = j;
    int k = 1, p = j - 1;        // 当前最远访问到的位置 
    for(int i = 2; i < lena; i ++){
        int l = next[i - k];
        if(i + l <= p) {next[i] = l; continue;}
        int j = p - i + 1;  if(j < 0)j = 0;
        while(i + j < lena && a[i + j] == a[j])j ++;
        next[i] = j; k = i; p = j - 1;
    } 
    for(int i = 0; i < lena; i ++)printf("%d ", next[i]);cout<<endl;
    int lenb = strlen(b); j = 0;
    while(j < lenb && j < lena && a[j] == b[j])j ++;
    ext[0] = j;
    k = 0;p = j - 1;
    for(int i = 1; i < lena; i ++){
        int l = next[i - k];
        if(i + l <= p){ext[i] = l; continue;}    
        int j = p - i + 1; if(j < 0)j = 0;
        while(i + j < lena && b[j] == a[i + 1])j ++;
        ext[i] = j; k = i; p = i + j - 1;
        printf("%d  %d  %d %d  %d\n", i, l, p, ext[i], j);
    }
    for(int i = 0; i < lena; i ++)printf("%d ", ext[i]);cout<<endl;
    system("pause");
    return 0;
}

vj 1866 动物园

我写的这种方法代码乍一看简直就是一个纯kmp，事实上也差不多，就是求出next数组后，下一遍还是扫它自身，但是这时的j 一直保证比 i / 2 小就可以了。^_^ > _ <

#include <iostream>
#include <cstdio>
#include <algorithm>
#include <cmath>
#include <cstring>
#define MAXN 1100005
#define mod 1000000007
using namespace std;
char s[MAXN];
int next[MAXN], lala[MAXN];
long long ans = 1;
int main()
{
    int t; scanf("%d", &t); while(t --){
        scanf("%s", s + 1); int j = 1, lens = strlen(s + 1); ans = 1;
        next[1] = 0; j = 0; lala[1] = 1;
        for(int i = 2; i <= lens; i ++){
            while(j && s[i] != s[j + 1])j = next[j];
            if(s[i] == s[j + 1])j ++;
            next[i] = j; 
            lala[i] = lala[j] + 1;
        }    j = 0;
        for(int i = 2; i <= lens; i ++){
            while(j && s[i] != s[j + 1])j = next[j];
            if(s[i] == s[j + 1])j ++;    
            if(j * 2 > i)j = next[j];
            ans = (ans * (lala[j] + 1)) % mod;
        }
        cout<<ans<<endl;
    }
  //  system("pause");
    return 0;
}

hdu 1841

找出最小的同时包含两个串的串。

显然，最后的结果只包含这几种情况： 1的后缀接2的前缀， 2的后缀接1的前缀， 1包含在2中， 2包含在1中， 1和2首尾相连。

第一次忘记了包含这种情况~~~~(>_<)~~~~

#include <iostream>
#include <cstdio>
#include <algorithm>
#include <cstring>
#include <cmath>
#include <vector>
#include <cstdlib>
#include <time.h>
#include <queue>
#define MAXN 1000005
using namespace std;
char s1[MAXN], s2[MAXN];
int next[MAXN], ans;
inline void getnext(char s[]){
	next[0] = -1;
	int j = -1, len = strlen(s);
	for(int i = 1; i < len; i ++){
		while(j >= 0 && s[j + 1] != s[i])j = next[j];
		if(s[j + 1] == s[i]) j ++;
		next[i] = j;
	}
}
inline int pei(char s[], char s2[]){
	int j = -1, len = strlen(s), len2 = strlen(s2);
	for(int i = 0; i < len; i ++){
		while(j >= 0 && s2[j + 1] != s[i])j = next[j];
		if(s2[j + 1] == s[i]) j ++;
		if(j  + 1 == len2)return -2;
	}return j;
}
int main()
{
	int t; scanf("%d", &t);
	while(t --){
		scanf("%s%s", s1, s2); ans = -1;
		int l1 = strlen(s1), l2 = strlen(s2);
		getnext(s1); int a = pei(s2, s1); if(a == -2){cout<<l2<<endl; continue;}
		getnext(s2); int b = pei(s1, s2); if(b == -2){cout<<l1<<endl; continue;}
		if(a == -1 && b == -1)cout<<l1 + l2<<endl;
		else cout<<l1 + l2 - max(a, b) - 1<<endl;
	} 
	return 0;
}