KMP训练五题

最新推荐文章于 2022-11-27 09:22:59 发布

theArcticOcean

最新推荐文章于 2022-11-27 09:22:59 发布

阅读量802

点赞数

分类专栏： algorithm_字符串文章标签： kmp

本文链接：https://blog.csdn.net/thearcticocean/article/details/50984183

版权

algorithm_字符串专栏收录该内容

22 篇文章 0 订阅

订阅专栏

再次学习KMP，关于字符串的许多东西快忘光了，惭愧。这次训练5道题，分别是：
hdu 3336 Count the string （理解）
hdu 4763 Theme Section （理解）
hdu 2594 Simpson’s Hidden Talents （合并串）
hdu 3746 Cyclic Nacklace （最小循环节）
zoj 3587 Marlon’s String （模式串T在主串S上的移动）

hdu 3336 Count the string

http://acm.hdu.edu.cn/showproblem.php?pid=3336
大意：求解一个字符串前缀在字符串里出现次数的和。
分析：由KMP算法得到bnext数组，它记录了前缀和后缀相同的长度。（这里的后缀是广义的后缀，不仅仅是字符串末尾，也可以是中间，关键看指针的移动）
因此如果bnext数组（下标1——n）上的值是大于0的，我们加2，等于0的加1。

code:

#include <iostream>
#include <cstdio>
#include <cstring>
using namespace std;
const int N=2e5+10,mod=10007;
char str[N];
int n;
int bnext[N];
void get_next(){
    int i=0,j=-1;
    bnext[0]=-1;
    while(i<=n){
        if(j==-1 || str[i]==str[j]){
            bnext[++i]=++j;
        }
        else j=bnext[j];
    }
}
int main()
{
    int t;
    cin>>t;
    while(t--){
        scanf("%d%s",&n,str);
        memset(bnext,0,sizeof(bnext));
        get_next();

        int ans=0;
        for(int i=1;i<=n;i++){
           if(bnext[i]>0) ans=(ans+2)%mod;
           else ans=(ans+1)%mod;
        }
        printf("%d\n",ans);
    }
    return 0;
}

hdu 4763 Theme Section

http://acm.hdu.edu.cn/showproblem.php?pid=4763
求解：符合结构EAEBE字符串的E的最大长度。A和B的长度任意。
分析：理解next数组的意义容易解出。

字符串
串	aaaaa
位置	-1 0 1 2 3 4
next	0 1 2 3 4

字符串
串	a b a a c
位置	-1 0 1 2 3 4
next	0 0 1 1 0

code:

#include <cstdio>
#include <iostream>
#include <cstring>
using namespace std;
const int N=1e6+10;
char s[N];
int bnext[N];
void getnext(int len){
    int i=0,j=-1;
    bnext[0]=-1;
    while(i<=len){
        if(j==-1 || s[i]==s[j]){
            bnext[++i]=++j;
        }
        else j=bnext[j];
    }
}

int main(){
    //freopen("cin.txt","r",stdin);
    int t;
    cin>>t;
    while(t--){
        scanf("%s",s);
        int len=strlen(s);
        memset(bnext,0,sizeof(bnext));
        getnext(len);

        int ans=0;
        for(int i=bnext[len];i>=1;i--){
            if(ans) break;
            for(int j=len-i;j>=i;j--){
                if(bnext[j]>=i) {
                    ans=i;  
                    break;
                }
            }
         }
         printf("%d\n",ans);
    }
    return 0;
}

hdu 2594 Simpsons’ Hidden Talents

http://acm.hdu.edu.cn/showproblem.php?pid=2594
大意：求解A、B两个字符串A的前缀和B的后缀一样的最大长度。
分析：
思路１——将Ａ和Ｂ连接起来，用ｋｍｐ算法求得匹配数组bnext，然后得到p=bnext[length]，即新串的前后重叠长度。如果大于了原来两个串的长度，则往回继续找p=benxt[p]，直到p<=A.length && p<=B.length
（不断缩短重叠的长度）

code:

#include <iostream>
#include <cstdio>
#include <cstring>
using namespace std;
const int N=5e4+10;
int bnext[N<<1];
char s1[N<<1],s2[N];
void getnext(int len){
    int i=0,j=-1;
    bnext[0]=-1;
    while(i<=len){ 
        if(j==-1 || s1[j] == s1[i]){
            bnext[++i]=++j;
        }    
        else j=bnext[j];
    }
}

int main(int argc, char *argv[]) {
    //freopen("cin.txt","r",stdin);
    while(gets(s1)){
        gets(s2);
        int l1=strlen(s1),l2=strlen(s2);
        strcat(s1,s2);
        int L=strlen(s1);  
        memset(bnext,0,sizeof(bnext));
        getnext(L);
        int p=bnext[L];
        while(p>l1 || p>l2) p=bnext[p];

        if(p>0) printf("%s %d\n",s1+L-p,p);
        else printf("0\n");     
    }    
    return 0;
}

思路2——直接用KMP算法计算两个不同串的bnext匹配记录数组。最后直接输出bnext
但是这样形如asd、asd的例子不能求出正确答案，因此，我把第二个字符串asd变长成为Aasd，然后计算bnext数组。（但是这个解决方案不正确，我没能找到bug）

WA code:

#include <iostream>
#include <cstdio>
#include <cstring>
using namespace std;
const int N=5e4+10;
int bnext[N];
char s1[N],s2[N];
void getnext(int l1,int l2){
    int i=0,j=-1;
    bnext[0]=-1;
    while(j<=l1&&i<=l2){ // j is same
        if(j==-1 || s1[j] == s2[i]){
             bnext[++i]=++j;
        }
        else j=bnext[j];
    }
}

int main(int argc, char *argv[]) {
    //freopen("cin2.txt","r",stdin);
    while(gets(s1)){
        gets(s2+1);
        s2[0]='A';  // for eg: werwer wer, we have to change wer to Awer make the progress work. (all letters are in lowercase)

        int L1=strlen(s1),L2=strlen(s2);
        memset(bnext,0,sizeof(bnext));
        getnext(L1,L2);

        int ans=bnext[L2];
        if(ans>0) printf("%s %d\n",s2+L2-ans,ans);
        else printf("0\n");
     }
     return 0;
}

hdu 3746 Cyclic Nacklace

http://acm.hdu.edu.cn/showproblem.php?pid=3746
大意：使用最少的珠子让项链变成顺时针对称环形。
分析：想要补充的珠子最小，即补充完整最小的循环节。
最小循环节：cir=L-bnext[L]; 这就是关键点

code:

#include <cstdio>
#include <iostream>
#include <cstring>
using namespace std;
const int N=1e5+10;
char s[N];
int bnext[N];
void getnext(int len){
    int i=0,j=-1;
    bnext[0]=-1;
    while(i<=len){
        if(j==-1 || s[i]==s[j]){
        bnext[++i]=++j;
    }
        else j=bnext[j];
    }
}

int main(){
    int t;
    cin>>t;
    while(t--){
        scanf("%s",s);
        int L=strlen(s);
        memset(bnext,0,sizeof(bnext));
        getnext(L);
        int cir=L-bnext[L]; // smallest     circle length
        if(L%cir==0&&cir<L) puts("0");
        else if(cir==L) printf("%d\n",L);
        else {
             printf("%d\n",cir-(L-L/cir*cir));
        }
    }
    return 0;
}

zoj 3587 Marlon’s String （模式串T在主串S上的移动）

http://acm.zju.edu.cn/onlinejudge/showProblem.do?problemCode=3587
题目：
Given two strings S and T. Return the amount of tetrad (a,b,c,d) which satisfy Sa..b + Sc..d = T , a≤b and c≤d.
The operator + means concate the two strings into one.

分析：本题可以说是这五道题的大BOSS，写了将近两天。有了C - Simpsons’ Hidden Talents的启发，我开始的思路就是把T和S拼接在一起，然后一次KMP；把T和S逆序反转再来一次KMP，得到两个next数组
对于两个next数组做同样的处理：
初始化num数组0，用于记录两个字符串相同前缀的个数，设L1是第一个字符数组s1的长度。那么如果next[i]>0且i>L1 代表这有个前缀，我num[i]++. 最后for i=L1–>1 : num[next[i]]+=num[i]; 恩，动态规划的思想（卡到这里，学习了别人的思路）
num1和num2，设L是S2的长度，我觉得最后的答案就是 $\sum num_1(i)num_2(L-1)$
然而，too young too simple。
使用代码

void getnext(int len){
    int i=0,j=-1;
    bnext[0]=-1;
    while(i<=len){
        if(j==-1 || s[i]==s[j]){
        bnext[++i]=++j;
    }
        else j=bnext[j];
    }
}

求得的next数组长的是这个样子：

type
数组	a a b a a
index	0 1 2 3 4 5
next	-1 0 1 0 1 2

那么对于样例:
aaaa
aaa

type
数组	a a a a a a a
index	0 1 2 3 4 5 6 7
next	-1 0 1 2 3 4 5 6
num	0 0 0 1 1 1 1
num(after change)	4 4 4 4 3 2 1

计算结果：32
抱着一种侥幸的心理想要直接用next[i]去重: 4 4 4–>4 3 2 这个例子倒是解决了，但是对于更普通的例子
aaaddd
add
又不行了（又变少了）。
研究发现，先连接两个字符串然后处理会产生重复。
/———————————————————————————————————–
正确的解法是，求出模式串T的next数组，通过其引导，在主串上不断移动，找到所有的长度的相同前缀。相同的字符一定是经过比较的，不理解这句话可以参考博文：
http://kb.cnblogs.com/page/176818/
不过，想要让其顺利实现，因为涉及到字符比较，所以需要重新改改那个求解next的方法，不让其有-1的存在（数组的下标不会是-1）。
最后再用之前的DP思想和求和得到答案。

code:

#include <cstdio>
#include <iostream>
#include <cstring>
using namespace std;
const int N=1e5+10;
typedef long long LL;
char s1[N], s2[N];
int bnext[N];
int num1[N],num2[N];

void rever(char *s){
    int len=strlen(s);
    for(int i=0;i<len/2;i++){
        char t=s[i];  s[i]=s[len-1-i];  s[len-1-i]=t;
    }
}
void getnext(int m[]){
    memset(bnext,0,sizeof(bnext));
      int len=strlen(s2);
    int i=1,j=0;
    bnext[0]=0;  //短串记录匹配值：aaba -- 0123: 0101
    while(i<=len){
        if(s2[i]==s2[j]){
            bnext[++i]=++j;
        }
        else if(j>0 && s2[i]!=s2[j]) {
            j=bnext[j];
        }
        else i++;
    }
    for(int i=0,j=0;s1[i];i++){  //短串匹配长串 不断在长串上移动
        while(j>0 && s1[i]!=s2[j])  j=bnext[j];
        if(s1[i]==s2[j]) m[++j]++;  // j=0 指针改变s1上的位置 相同则加1（存在一个相同的前缀）
    }
    for(int i=len;i>0;i--){
        m[bnext[i]]+=m[i];
    }
}

int main(){
    int t;
    cin>>t;
    while(t--){
        scanf("%s%s",s1,s2);  //substring of s2 make s1, s1 is short
        memset(num1,0,sizeof(num1));
        memset(num2,0,sizeof(num2));
        getnext(num1);
        rever(s1);
        rever(s2);
        getnext(num2);
        LL ans=0;
        int L=strlen(s2);
        for(int i=1;i<L;i++){
            ans=ans+1LL*num1[i]*num2[L-i];
        }
        printf("%lld\n",ans);
    }
    return 0;
}

theArcticOcean

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
0
评论
KMP训练五题

再次学习KMP，关于字符串的许多东西快忘光了，惭愧。这次训练5道题，分别是： hdu 3336 Count the string （理解） hdu 4763 Theme Section （理解） hdu 2594 Simpson’s Hidden Talents （合并串） hdu 3746 Cyclic Nacklace （最小循环节） zoj 3587 Marlon’s String
复制链接

扫一扫