19.KMP算法

最新推荐文章于 2024-07-18 17:11:08 发布

manng •ᴗ•

最新推荐文章于 2024-07-18 17:11:08 发布

阅读量176

点赞数

分类专栏：暑期训练

本文链接：https://blog.csdn.net/qq_52008247/article/details/119741653

版权

暑期训练专栏收录该内容

16 篇文章 0 订阅

订阅专栏

KMP算法

模板
练习

模板

#include <iostream>
#include <cstring>
#include <cmath>
#include <cstdio>
#include <algorithm>
using namespace std;
const int N=1010;
char a[N],b[N];
int nextt[N];

//next[]数组本身的含义：当前字符之前的字符串中，有多大长度的前缀后缀 
//字符串本身不被包含在前后缀中 
void getnext(char p[],int next[])
//同时意味着某个字符失配时，该字符对应的next告诉你下一步匹配中，模式串指针应该跳到哪个位置 
{
	int pl=strlen(p);
	next[0]=-1;	//表示不存在相同的最大前缀和最大后缀 
	int k=-1,j=0;
	
	while(j<pl)
	{
		// p[k]表示前缀，p[j]表示后缀 
		//这里的next数组已被优化 
		if(k==-1 || p[j]==p[k])
		{
			j++;
			k++;
			if(p[j]!=p[k])
				next[j]=k;
			else	//如果前缀后缀相同，那么他们的next值也相同 
				next[j]=next[k];
		}
		else
			k=next[k];
	}
}

//返回模式串在主串中的位置 
int kmp(char s[],char p[])
{	
	int i=0,j=0;
	int sl=strlen(a);
	int pl=strlen(p);
	
	while(i<sl&&j<pl)
	{
		//如果j=-1，或者当前字符匹配成功 
		if(j==-1 || s[i]==p[j])
		{
			i++;
			j++;
			//继续向后匹配 
		}
		else	//失配时 
		{				  //i不变 
			j=nextt[j];   // <==>p相对于s向右移动了j-next[j]位 
		}
	}
	if(j==pl)	return i-j;
	return -1;
}

int main()
{
	cin>>a>>b;
	getnext(b,nextt);
	cout<<kmp(a,b)<<endl;;
	return 0;
}

练习

A - Number Sequence（KMP裸题）

Given two sequences of numbers : a[1], a[2], … , a[N], and b[1], b[2], … , b[M] (1 <= M <= 10000, 1 <= N <= 1000000). Your task is to find a number K which make a[K] = b[1], a[K + 1] = b[2], … , a[K + M - 1] = b[M]. If there are more than one K exist, output the smallest one.
Input
The first line of input is a number T which indicate the number of cases. Each case contains three lines. The first line is two numbers N and M (1 <= M <= 10000, 1 <= N <= 1000000). The second line contains N integers which indicate a[1], a[2], … , a[N]. The third line contains M integers which indicate b[1], b[2], … , b[M]. All integers are in the range of [-1000000, 1000000].
Output
For each test case, you should output one line which only contain K described above. If no such K exists, output -1 instead.
Sample Input
2
13 5
1 2 1 2 3 1 2 3 1 3 2 1 2
1 2 3 1 3
13 5
1 2 1 2 3 1 2 3 1 3 2 1 2
1 2 3 2 1
Sample Output
6
-1
题目大意：
求模式串在待匹配串中的出现位置。
AC代码：

#include <iostream>
#include <cstring>
#include <cstdio>
#include <cmath>
#include <algorithm>
using namespace std;
const int N=1e6+10;
int t,n,m;
int a[N],b[N];	//一开始这里想用char类型存，但是wa了 
int k;
int nextt[N];

void getnext(int p[],int next[])
{
	int lp=n;
	next[0]=-1;
	int k=-1,j=0;
	
	while(j<lp-1)
	{
		if(k==-1||p[k]==p[j])
		{
			j++;
			k++;
			if(p[k]!=p[j])	next[j]=k;
			else next[j]=next[k];
		}
		else	k=next[k];
	}
}

int kmp(int s[],int p[])
{
	int i=0,j=0;
	int ls=m,lp=n;
	
	while(i<ls&&j<lp)
	{
		if(j==-1||s[i]==p[j])
		{
			i++;
			j++;
		}
		else
		{
			j=nextt[j];
		}
	}

	if(j==lp)	return i-j+1;
	return -1;
}

int main()
{
	scanf("%d",&t);
	while(t--)
	{
		scanf("%d%d",&m,&n);
		for(int i=0;i<m;i++)	scanf("%d",&a[i]);
		for(int i=0;i<n;i++)	scanf("%d",&b[i]);
		memset(nextt,0,sizeof(nextt));
	    getnext(b,nextt);
		k=kmp(a,b);
		printf("%d\n",k);
	}
	return 0;
}

B - Oulipo（出现次数1.0）

求模式串在待匹配串中的出现次数。
Input
第一行是一个数字T，表明测试数据组数。之后每组数据都有两行：
第一行为模式串，长度不大于10,000；
第二行为待匹配串，长度不大于1,000,000。（所有字符串只由大写字母组成）
Output
每组数据输出一行结果。
Sample Input
4
ABCD
ABCD
SOS
SOSOSOS
CDCDCDC
CDC
KMP
SOEASY
Sample Output
1
3
0
0
AC代码：

#include <iostream>
#include <cstring>
#include <cstdio>
#include <cmath>
#include <algorithm>
using namespace std;
const int N=1e6+10;
int t;
char a[N],b[N];
int nextt[N];
int ans;

void getnext(char p[],int next[])
{
	int lp=strlen(p);
	int i=-1,j=0;
	next[0]=-1;
	while(j<lp)
	{
		if(i==-1||p[i]==p[j])
		{
			i++;j++;
			if(p[i]!=p[j])
				next[j]=i;
			else	next[j]=next[i];
		}
		else	i=next[i];
	}
}

int kmp(char s[],char p[])
{
	int ans=0; 
	int ls=strlen(s),lp=strlen(p);
	int i=0,j=0;
	
	while(i<ls&&j<lp)
	{	

		if(j==-1||s[i]==p[j])
		{
			i++;j++;	
			
			//	与模板不同点	
			if(j==lp)	//如果模式串已经遍历完了 
			{
				ans++;
				j=nextt[j];	//j跳到下一位置
			}
		}
		else	j=nextt[j];
	}
	return ans;
}

int main()
{
	scanf("%d",&t);
	while(t--)
	{
		scanf("%s",a);
		scanf("%s",b);
		
		memset(nextt,0,sizeof(nextt));
		getnext(a,nextt);
		
		printf("%d\n",kmp(b,a));
	}
	return 0;
}

C - 剪花布条（出现次数2.0）

一块花布条，里面有些图案，另有一块直接可用的小饰条，里面也有一些图案。对于给定的花布条和小饰条，计算一下能从花布条中尽可能剪出几块小饰条来呢？
Input
输入中含有一些数据，分别是成对出现的花布条和小饰条，其布条都是用可见ASCII字符表示的，可见的ASCII字符有多少个，布条的花纹也有多少种花样。花纹条和小饰条不会超过1000个字符长。如果遇见#字符，则不再进行工作。
Output
输出能从花纹布中剪出的最多小饰条个数，如果一块都没有，那就老老实实输出0，每个结果之间应换行。
Sample Input
abcde a3
aaaaaa aa

Sample Output
0
3
AC代码：
C题和B题很像，但是C题j的遍历位置(相对i)不能重复，B可以。

#include <iostream>
#include <cstring>
#include <cstdio>
#include <cmath>
#include <algorithm>
using namespace std;
const int N=1010;
int nextt[N];
char a[N],b[N];

void getnext(char p[],int next[])
{
	int i=-1,j=0;
	int lp=strlen(p);
	next[0]=-1;
	
	while(j<lp)
	{
		if(i==-1||p[i]==p[j])
		{
			i++;
			j++;
			if(p[i]!=p[j])	next[j]=i;
			else	next[j]=next[i];
		}
		else	i=next[i];
	}
}

int kmp(char s[],char p[])
{
	int i=0,j=0;
	int ls=strlen(s),lp=strlen(p);
	int ans=0;
	
	while(i<ls&&j<lp)
	{
		if(j==-1||s[i]==p[j])
		{
			i++;
			j++;
			
			//本题关键点 
			if(j==lp)
			{
				ans++;
				j=0;	//i往后遍历，j从头开始 
			}
		}
		else	j=nextt[j];
		
	}
	return ans;
}

int main()
{
	while(scanf("%s",a)!=EOF)
	{
		if(a[0]=='#')	break;	//注意这里不要写成a=="# 
		scanf("%s",b);
		
		memset(nextt,0,sizeof(nextt));
		getnext(b,nextt);
		
		printf("%d\n",kmp(a,b));
	}
	return 0;
}

D - Cyclic Nacklace（最小循环节）⭐⭐

给定一个字符串，求：在该字符串末尾最少补充多少个字符，可以使得这个字符串获得周期性。
周期性指存在一个子串，使得该字符串可以正好分解成若干个这个子串（数量要大于1）。
Input
第一行是一个整数 T ( 0<T<=100 ) 代表测试数据的组数。
之后T行每行一个字符串，由小写字母组成，字符串的长度3<=L<=100000。
Output
每组数据输出一行结果。
Sample Input
3
AAA
ABCA
ABCDE
Sample Output
0
2
5
AC代码：

#include <iostream>
#include <cstring>
#include <cstdio>
#include <cmath>
#include <algorithm>
using namespace std;
const int N=1e5+10;
int t;
int nextt[N];
char a[N],b[N];

void getnext(char p[],int next[])
{
	int i=-1,j=0;
	int lp=strlen(p);
	next[0]=-1;
	
	while(j<lp)
	{
		if(i==-1||p[i]==p[j])
		{
			i++;
			j++;
			
			//注意这里使用优化未优化next版本都可 
			//next[j]=i;	未优化版本，表示该字符前前后缀最长相等长度 
			// 优化next版本 ，优化后中间next值改变，但next[lp]相同 
			if(p[i]!=p[j])	next[j]=i;
			else	next[j]=next[i];
		}
		else	i=next[i];
	}
}

int main()
{
	scanf("%d",&t);
	while(t--)
	{
//		加了memset会wa 
//		memset(a,0,sizeof(a));
		scanf("%s",&a);
		int la=strlen(a);
		
		memset(nextt,0,sizeof(nextt));
		getnext(a,nextt);
		
		//  重点 
		int len=la-nextt[la];	//最小循环节长度 
		//	nextt[la]表示a字符串中最长前后缀相等长度，可将其看作相等后缀那部分  
		//  la则是字符串长度，二者相减就是 相等部分前缀+不相等部分  的长度，也就是最小循环节长度
		//  比如： abdefab    la=7,nextt[la]=2
		//         abdef ab(def) 
		//			 5    2 
		
		int	ans;
		if(len==la)	ans=la;
		else if(la%len==0)		//如果字符串长度可以整除最小循环节长度 
			ans=0;		//则不需要在添加字符 
		else	ans=len-la%len;		//见上述例子 
		printf("%d\n",ans);
	}
	return 0;
}

E - Period（循环节出现次数）

给定一个字符串，ascii码在97到126之间（字母）如果前i个字符组成的字符串是由子字符串循环一定次数组成的，则该字符串为周期串，子字符串为循环节。比如aabaabaabaab，在前两个字符中a出现了两次，循环节为a。在前6个字符中，循环节出现了2次，循环节为aab，在前9个字符中，循环节出现了3次，循环节为aab；在12个字符中循环节出现了4次，循环节为aab。请编程写出给定字符串的n长度的周期串以及循环节出现的次数
Input
循环输入，每次测试输入两行，第一行为字符串的长度，第二行为给定的字符串。输入0结束
Output
每次测试先输出"Test case #"加上测试的次数，换行输出全部周期串的长度以及循环节的循环次数（次数>1），用空格隔开
Sample Input
3
aaa
12
aabaabaabaab
0
Sample Output
Test case #1
2 2
3 3

Test case #2
2 2
6 2
9 3
12 4
AC代码：

#include <iostream>
#include <cstring>
#include <cstdio>
#include <cmath>
#include <algorithm>
using namespace std;
const int N=1e6+10;
char a[N];
int nextt[N];
int l,cnt=0;

void getnext(char p[],int next[])
{
	int i=-1,j=0;
	next[0]=-1;
	
	while(j<l)
	{
		if(i==-1||p[i]==p[j])
		{
			i++;
			j++;	
			next[j]=i;	//不能使用优化next版本做，中间值改变了 
			
			int ll=j-next[j];	//j遍历到的字符前（不包括j）最小循环节长度 
			if(j%ll==0&&j/ll>1)		//如果可以整除，说明j之前是一个长度为j的周期串 
				printf("%d %d\n",j,j/ll);	//长度 出现次数 
		}
		else	i=next[i];
	}
}

int main()
{
	while(scanf("%d",&l)!=EOF&&l)
	{
		scanf("%s",a);
		
		cnt++;
		printf("Test case #%d\n",cnt);
		getnext(a,nextt);
		printf("\n");
	}
	return 0;
}

F - Power Strings

假设s可以由t重复k次拼成，即s=tttt……tt，我们称为s=t^k。先给定一个字符串s，求最大的n使得存在t满足s=t^n。

Input
多组数据，每行一个字符串(仅包含可打印字符且长度不超过1000000)，以单独一行.作为终结标志

Output
每组数据一行答案

Sample Input
abcd
aaaa
ababab
.

Sample Output
1
4
3

AC代码：
与E思路差不多。

#include <iostream>
#include <cstring>
#include <cstdio>
#include <cmath>
#include <algorithm>
using namespace std;
const int N=1e6+10;
char a[N];
int nextt[N];

void getnext(char p[],int next[])
{
	int i=-1,j=0;
	next[0]=-1;
	int l=strlen(p);
	
	while(j<l)
	{
		if(i==-1||p[i]==p[j])
		{
			i++;j++;
			if(p[i]!=p[j])	next[j]=i;
			else	next[j]=next[i];
		}
		else	i=next[i];
	}
}

int main()
{
	while(scanf("%s",a)!=EOF)
	{
		if(a[0]=='.')	break;
		getnext(a,nextt);
		
		int la=strlen(a);
		int len=la-nextt[la];
		int ans;
		if(la%len==0)	ans=la/len;
		else	ans=1;
		printf("%d\n",ans);
	}
	return 0;
}

G - Seek the Name, Seek the Fame（求出所有既是前缀又是后缀的子串长度）⭐⭐⭐

题目描述
给定若干只含小写字母的字符串（这些字符串总长≤400000），在每个字符串中求出所有既是前缀又是后缀的子串长度。

例如：ababcababababcabab，既是前缀又是后缀的子串：ab，abab，ababcabab，ababcababababcabab。

输入格式
输入若干行，每行一个字符串。

输出格式
对于每个字符串，输出一行，包含若干个递增的整数，表示所有既是前缀又是后缀的子串长度。

样例输入
ababcababababcabab
aaaaa
alala
nucacm
样例输出
2 4 9 18
1 2 3 4 5
1 3 5
6
AC代码：
先求字符串本身前后缀相等的最大长度(也就是next[l]），再求子串（a[0~next[l]]）的前后缀相等最大长度…直到i=0。

#include <iostream>
#include <cstring>
#include <cstdio>
#include <cmath>
#include <algorithm>
using namespace std;
const int N=4e5+10;
int nextt[N];	//该点前最大前后缀相等长度 
char a[N];
int ans[N];

void getnext(char p[],int next[])
{
	int i=-1,j=0;
	next[0]=-1;
	int lp=strlen(p);
	
	while(j<lp)
	{
		if(i==-1||p[i]==p[j])
		{
			i++;j++;
			next[j]=i;	//因为要用到中间值，所以用未优化的方法 
		}
		else	i=next[i];
	}
}

int main()
{
	while(scanf("%s",a)!=EOF)
	{
		getnext(a,nextt);
		
		int l=strlen(a);
		int k=0;
		
		//核心代码 
		for(int i=l;i!=0;i=nextt[i])
		{
			ans[k]=nextt[i];
			k++;
		}
		
		//长度不能为0，所以从k-2开始 
		for(int i=k-2;i>=0;i--)
			printf("%d ",ans[i]);
		printf("%d\n",l);
	}
	return 0;
}

manng •ᴗ•

关注

0
点赞
踩
1

收藏

觉得还不错? 一键收藏
0
评论
19.KMP算法

KMP算法模板练习A - Number Sequence（KMP裸题）B - Oulipo（出现次数1.0）C - 剪花布条（出现次数2.0）D - Cyclic Nacklace（最小循环节）⭐⭐E - Period（循环节出现次数）F - Power StringsG - Seek the Name, Seek the Fame（求出所有既是前缀又是后缀的子串长度）⭐⭐⭐模板#include <iostream>#include <cstring>#include &lt
复制链接

扫一扫

专栏目录