2018-08-10 kmp

最新推荐文章于 2023-02-11 01:15:00 发布

zm_zsy

最新推荐文章于 2023-02-11 01:15:00 发布

阅读量244

点赞数

分类专栏：数据结构

本文链接：https://blog.csdn.net/zm_zsy/article/details/81771698

版权

数据结构专栏收录该内容

36 篇文章 0 订阅

订阅专栏

A -- Number Sequence
Description

Given two sequences of numbers : a[1], a[2], ...... , a[N], and b[1], b[2], ...... , b[M] (1 <= M <= 10000, 1 <= N <= 1000000). Your task is to find a number K which make a[K] = b[1], a[K + 1] = b[2], ...... , a[K + M - 1] = b[M]. If there are more than one K exist, output the smallest one

Input

The first line of input is a number T which indicate the number of cases. Each case contains three lines. The first line is two numbers N and M (1 <= M <= 10000, 1 <= N <= 1000000). The second line contains N integers which indicate a[1], a[2], ...... , a[N]. The third line contains M integers which indicate b[1], b[2], ...... , b[M]. All integers are in the range of [-1000000, 1000000]

Output

For each test case, you should output one line which only contain K described above. If no such K exists, output -1 instead

Sample Input

13 5

1 2 1 2 3 1 2 3 1 3 2 1 2

1 2 3 1 3

13 5

1 2 1 2 3 1 2 3 1 3 2 1 2

1 2 3 2 1

Sample Output

-1

题目理解

$\small kmp$ 模板题，将 $\small char$ 数组变成 $\small int$ 数组就好

#include<cstdio>
const int maxn=1000005;
const int maxm=10005;
int ca[maxn],b[maxm],next[maxm];
void getNext(int len)
{
    next[0] = -1;
    int i=0,k = -1;
    while(i < len)
    {
        while( k!=-1&&b[i]!=b[k])
        {
            k=next[k];
        }
        if(k==-1||b[i]==b[k])
        {
            ++i,++k;
            next[i] = k;
        }
    }
    return ;
}
int main()
{
    int t,n,m;
    scanf("%d",&t);
    while(t--){
        scanf("%d%d",&n,&m);
        for(int i=0;i<n;++i)
            scanf("%d",&ca[i]);
        for(int i=0;i<m;++i)
            scanf("%d",&b[i]);
        getNext(m);
        //for(int i=0;i<m;++i)
          //  printf("%d\n",next[i]);

        int i=0,k=0;
        while(i<n)
        {
           while(k!=-1&&ca[i]!=b[k])
           {
                k=next[k];
           }
           if(k==-1||ca[i]==b[k]) // 这个条件可以省略
               ++i,++k;
           if(k==m)
                break;
         }
         if(k==m)
           printf("%d\n",i-m+1);
         else
           printf("-1\n");
     }
     return 0;
}

B -- Clock Pictures
Description

http://acm.csu.edu.cn/csuoj/problemset/problem?pid=1581

题目理解

这道题因为给出的是相对位置，我们不知道开始的位置相对的偏移量是多少所以没有办法直接排序后判断，但是给出的序列经过排序后每每之间的相对位置应该是相对应的。所以得出角度差值序列以后将一个序列扩展为两呗然后 $\small kmp$ 即可

#include <cstdio>
#include <cstring>
#include <algorithm>
using namespace std;
const int maxn=200000+5;
int a[maxn],b[maxn],a1[maxn],b1[2*maxn];
int _next[maxn];
int n;
void getNext(int* a)
{
    memset(_next,-1,sizeof(_next));
    int k=-1,i=0;
    while(i<n)
    {
        if(k==-1||a[i]==a[k])
        {
            k++;i++;
            _next[i] = k;
        }
        else
            k=_next[k];
    }
}
int kmp(int a[],int b[])
{
    int i=0,j=0;
    getNext(b);
    while(i<2*n && j<n)
    {
        if(a[i]==b[j])
        {
            i++;
            j++;
        }
        else if(j==0)
            i++;
        else
            j=_next[j];
    }
    if(j==n)
        return 1;
    else
        return 0;
}
int main()
{
    scanf("%d",&n);
    for(int i=0;i<n;i++)
        scanf("%d",&a[i]);
    for(int i=0;i<n;i++)
        scanf("%d",&b[i]);
    sort(a,a+n);
    sort(b,b+n);
    for(int i=0;i<n-1;i++)
        a1[i]=a[i+1]-a[i];
    a1[n-1]=a[0]-a[n-1]+360000;
    for(int i=0;i<n-1;i++)
        b1[i]=b[i+1]- b[i];
    b1[n-1]=b[0]-b[n-1]+360000;
    for(int i=n;i<2*n;i++)
        b1[i]=b1[i-n];
    if(kmp(b1,a1))
        printf("possible");
    else
        printf("impossible");
    return 0;
}

C -- 最长公共前缀
Description

给定两个字符串s和t，现有一个扫描器，从s的最左边开始向右扫描，每次扫描到一个t就把这一段删除，输出能发现t的个数

Input

第一行包含一个整数T（T<=50），表示数据组数
每组数据第一行包含一个字符串s，第二行一个字符串t，字符串长度不超过1000000

Output

对于每组数据，输出答案

Sample Input

ababab

Sample Output

题目理解

找到子串在母串中出现的个数，不重合计数。这里只要在每次子串走到串尾的时候将子串指针重置为0即可，母串指针已经后移可以不用理睬

#include<iostream>
#include<cstdio>
#include<cstring>
#define maxn 1000005
using namespace std;
char str[maxn],s[maxn];
int _next[maxn];
void getNext()
{
    int len = strlen(s);
    _next[0] = -1;
    // i 定位于0，由于无法向前匹配也无法向前求取next[next[i]]，所以属于结束条件
    int i=0,k = -1;
    while(i < len)
    {
        // 注意放置顺序，由于短路效应，应该先判断k的值如果为-1直接跳出
        while( k!=-1 && s[i] != s[k])
        {
            // 求(MAX)子串，可能子串里面包含更小子串
            // 向前缩短直到缩短到0，终止条件为与第一个不相等，向前调到-1
            k = _next[k];
        }
        // 满足结束条件直接赋值 0
        // 由next[i-1] = k-1成立，符合条件s[i]==s[k]，则可以得到next[i] = k;
        // k = -1与s[i]==s[k]为对立事件
        if(k==-1||s[i]==s[k])
        {
            ++i,++k;
            _next[i] = k;
        }
    }
    return ;
}
int main()
{
    int t;
    scanf("%d",&t);
    while(t--)
    {
        //printf("%s\n",s);
        scanf("%s",str);
        scanf("%s",s);
        getNext();
        int cnt=0;
        int sLen = strlen(s);
        int strLen = strlen(str);
       // 下标定位两串开头
        int i = 0,k=0;
        while(i < strLen)
        {
           while(k!=-1&&str[i]!=s[k])
           {
                k = _next[k];
           }
           // 注意i右移时k的复位
           if(k==-1 || str[i]==s[k]) // 这个条件可以省略
               ++i,++k;
            if(k==sLen){
                cnt++;
                k=0;
            }
        }
        printf("%d\n",cnt);
    }
    return 0;
}

D -- 剪花布条
Description

一块花布条，里面有些图案，另有一块直接可用的小饰条，里面也有一些图案。对于给定的花布条和小饰条，计算一下能从花布条中尽可能剪出几块小饰条来呢

Input

输入中含有一些数据，分别是成对出现的花布条和小饰条，其布条都是用可见ASCII字符表示的，可见的ASCII字符有多少个，布条的花纹也有多少种花样。花纹条和小饰条不会超过1000个字符长。如果遇见#字符，则不再进行工作

Output

输出能从花纹布中剪出的最多小饰条个数，如果一块都没有，那就老老实实输出0，每个结果之间应换行

Sample Input

abcde

aaaaaa

aa #

Sample Output

题目理解

由于剪下来的部分不能重复利用所以分析如同C题

#include<iostream>
#include<cstdio>
#include<cstring>
#define maxn 1000005
using namespace std;
char str[maxn],s[maxn];
int _next[maxn];
void getNext()
{
    int len = strlen(s);
    _next[0] = -1;
    // i 定位于0，由于无法向前匹配也无法向前求取next[next[i]]，所以属于结束条件
    int i=0,k = -1;
    while(i < len)
    {
        // 注意放置顺序，由于短路效应，应该先判断k的值如果为-1直接跳出
        while( k!=-1 && s[i] != s[k])
        {
            // 求(MAX)子串，可能子串里面包含更小子串
            // 向前缩短直到缩短到0，终止条件为与第一个不相等，向前调到-1
            k = _next[k];
        }
        // 满足结束条件直接赋值 0
        // 由next[i-1] = k-1成立，符合条件s[i]==s[k]，则可以得到next[i] = k;
        // k = -1与s[i]==s[k]为对立事件
        if(k==-1||s[i]==s[k])
        {
            ++i,++k;
            _next[i] = k;
        }
    }
    return ;
}
int main()
{
    while(~scanf("%s",str)&&str[0]!='#')
    {
        //printf("%s\n",s);
        scanf("%s",s);
        getNext();
        int cnt=0;
        int sLen = strlen(s);
        int strLen = strlen(str);
       // 下标定位两串开头
        int i = 0,k=0;
        while(i < strLen)
        {
           while(k!=-1&&str[i]!=s[k])
           {
                k = _next[k];
           }
           // 注意i右移时k的复位
           if(k==-1 || str[i]==s[k]) // 这个条件可以省略
               ++i,++k;
            if(k==sLen){
                cnt++;
                k=0;
            }
        }
        printf("%d\n",cnt);
    }
    return 0;
}

E -- Blue Jeans
Description

The Genographic Project is a research partnership between IBM and The National Geographic Society that is analyzing DNA from hundreds of thousands of contributors to map how the Earth was populated.
As an IBM researcher, you have been tasked with writing a program that will find commonalities amongst given snippets of DNA that can be correlated with individual survey information to identify new genetic markers.
A DNA base sequence is noted by listing the nitrogen bases in the order in which they are found in the molecule. There are four bases: adenine (A), thymine (T), guanine (G), and cytosine (C). A 6-base DNA sequence could be represented as TAGACC.
Given a set of DNA base sequences, determine the longest series of bases that occurs in all of the sequences.

Input

Input to this problem will begin with a line containing a single integer n indicating the number of datasets. Each dataset consists of the following components:

A single positive integer m (2 <= m <= 10) indicating the number of base sequences in this dataset.
m lines each containing a single base sequence consisting of 60 bases.

Output

For each dataset in the input, output the longest base subsequence common to all of the given base sequences. If the longest common subsequence is less than three bases in length, display the string "no significant commonalities" instead. If multiple subsequences of the same longest length exist, output only the subsequence that comes first in alphabetical order

Sample Input

GATACCAGATACCAGATACCAGATACCAGATACCAGATACCAGATACCAGATACCAGATA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA

GATACCAGATACCAGATACCAGATACCAGATACCAGATACCAGATACCAGATACCAGATA GATACTAGATACTAGATACTAGATACTAAAGGAAAGGGAAAAGGGGAAAAAGGGGGAAAA GATACCAGATACCAGATACCAGATACCAAAGGAAAGGGAAAAGGGGAAAAAGGGGGAAAA

CATCATCATCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC ACATCATCATAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AACATCATCATTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTT

Sample Output

no significant commonalities

AGATAC

CATCATCAT

题目理解

这道题一串多配，多次 $\small kmp$ 即可。这道题如果调用函数就会省事很多，因为在很多退出的边界条件如果直接在主程序里面转移需要许多个 $\small flag$ 标志，逻辑判断过于繁琐。在调用 $\small kmp$ 函数的时候如果只是将截取的首尾下标传进去也是能够实现的，而我这里是采用一个单独的字符数组然后将里面的内容变换之后当成一个新串进行匹配，注意的是，由于调用函数的时候使用了 $\small strlen$ 所以一定要记住赋值串尾 '\0'。注意题目要求输出相同长度下的字典序较小的串。

#include<iostream>
#include<cstdio>
#include<cstring>
#define maxn 65
using namespace std;
int _next[maxn];
void getNext(char* s)
{
    int len = strlen(s);
    _next[0] = -1;
    int i=0,k = -1;
    while(i < len)
    {
        while( k!=-1 && s[i] != s[k])
        {
            k = _next[k];
        }
        if(k==-1||s[i]==s[k])
        {
            ++i,++k;
            _next[i] = k;
        }
    }
    return ;
}
int kmp(char* str,char* s){
    //printf("!!%s\n!!!%s\n",str,s);
    int sLen = strlen(s);
    int strLen = strlen(str);
    int i = 0,k=0;
    while(i < strLen)
    {
        while(k!=-1&&str[i]!=s[k])
        {
            k = _next[k];
        }
        if(k==-1 || str[i]==s[k])
            ++i,++k;
        if(k==sLen){
            return 1;
        }
    }
    return 0;
}
int comp(char* s1,char* s2,int len){
    for(int i=0;i<=len;++i){
        if(s1[i]>s2[i])
            return 1;
        else if(s1[i]<s2[i])
            return 2;
    }
    return 0;
}
int main()
{
    int t,n;
    char str[10][maxn],s[maxn],ans[maxn];
    scanf("%d",&t);
    while(t--)
    {
        scanf("%d",&n);
        for(int i=0;i<n;++i)
         scanf("%s",str[i]);
        bool flag=false;
        ans[0]=-1;
        for(int len=59;len>1;--len){
            for(int i=0;i+len<60;++i){
              memset(s,0,sizeof(s));
              int sta=0;
              while(sta<=len){
                 s[sta]=str[0][i+sta];
                 sta++;
              }
              s[sta]='\0';
              //printf("%s\n",s);
              getNext(s);
              int cur;
              for(cur=1;cur<n;++cur){
                if(!kmp(str[cur],s)){
                    break;//失配时接下来的字符串不匹配
                }
              }
              if(cur==n){//遍历到最末尾所以成功所有串
                //printf("!!!%s\n");
                flag=true;
                if(ans[0]!=-1&&comp(ans,s,len)==1){
                    for(int i=0;i<=len+1;++i)
                        ans[i]=s[i];
                }
                if(ans[0]==-1){
                    for(int i=0;i<=len+1;++i)
                        ans[i]=s[i];
                }
              }
            }
            if(flag) break;
        }
        if(flag)printf("%s\n",ans);
        else printf("no significant commonalities\n");

    }
    return 0;
}

F -- Period
Description

For each prefix of a given string S with N characters (each character has an ASCII code between 97 and 126, inclusive), we want to know whether the prefix is a periodic string. That is, for each i (2 <= i <= N) we want to know the largest K > 1 (if there is one) such that the prefix of S with length i can be written as A K , that is A concatenated K times, for some string A. Of course, we also want to know the period K

Input

The input file consists of several test cases. Each test case consists of two lines. The first one contains N (2 <= N <= 1 000 000) – the size of the string S. The second line contains the string S. The input file ends with a line, having the number zero on it

Output

For each test case, output “Test case #” and the consecutive test case number on a single line; then, for each prefix with length i that has a period K > 1, output the prefix size i and the period K separated by a single space; the prefix sizes must be in increasing order. Print a blank line after each test case

Sample Input

aaa

aabaabaabaab

Sample Output

Test case #1

2 2

3 3

Test case #2

2 2

6 2

9 3

12 4

题目理解

如果存在循环节，循环节的长度就应该是 $\small i-next(i)$ 而这里的 $\small i$ 是串尾的下一个位置，根据 $\small next(i)$ 求取时候的递增原理可以推出其中两段、三段、四段...相同即循环节;如果不存在循环节其得出的长度就应该是串长。所以我们对长度大于2的前缀分别求取循环节的长度，然后取模如果能够整除说明是一个循环串。输出相应的循环节长度和个数即可

#include<cstdio>
#include<cstring>
#define maxn 1000007
int _next[maxn];
void getNext(char* s)
{
    int len = strlen(s);
    _next[0] = -1;
    int i=0,k = -1;
    while(i != len)
    {
        while( k!=-1 && s[i] != s[k])
        {
            k = _next[k];
        }
        if(k==-1||s[i]==s[k])
        {
            ++i,++k;
            _next[i] = k;
        }
    }
    return ;
}
int main()
{
    int len,cas=0;
    char s[maxn];
    while(~scanf("%d",&len)&&len)
    {
        scanf("%s",s);
        getNext(s);//跨越的就是循环节最小循环节
        printf("Test case #%d\n",++cas);
        for(int i=2;i<=len;++i){
            int m=i-_next[i];
            if(i!=m&&i%m==0){
                printf("%d %d\n",i,i/m);
            }
        }printf("\n");
    }
    return 0;
}

G -- Oulipo
Description

The French author Georges Perec (1936–1982) once wrote a book, La disparition, without the letter 'e'. He was a member of the Oulipo group. A quote from the book:
Tout avait Pair normal, mais tout s’affirmait faux. Tout avait Fair normal, d’abord, puis surgissait l’inhumain, l’affolant. Il aurait voulu savoir où s’articulait l’association qui l’unissait au roman : stir son tapis, assaillant à tout instant son imagination, l’intuition d’un tabou, la vision d’un mal obscur, d’un quoi vacant, d’un non-dit : la vision, l’avision d’un oubli commandant tout, où s’abolissait la raison : tout avait l’air normal mais…
Perec would probably have scored high (or rather, low) in the following contest. People are asked to write a perhaps even meaningful text on some subject with as few occurrences of a given “word” as possible. Our task is to provide the jury with a program that counts these occurrences, in order to obtain a ranking of the competitors. These competitors often write very long texts with nonsense meaning; a sequence of 500,000 consecutive 'T's is not unusual. And they never use spaces.
So we want to quickly find out how often a word, i.e., a given string, occurs in a text. More formally: given the alphabet {'A', 'B', 'C', …, 'Z'} and two finite strings over that alphabet, a word W and a text T, count the number of occurrences of W in T. All the consecutive characters of W must exactly match consecutive characters of T. Occurrences may overlap.

Input

The first line of the input file contains a single number: the number of test cases to follow. Each test case has the following format:
One line with the word W, a string over {'A', 'B', 'C', …, 'Z'}, with 1 ≤ |W| ≤ 10,000 (here |W| denotes the length of the string W).
One line with the text T, a string over {'A', 'B', 'C', …, 'Z'}, with |W| ≤ |T| ≤ 1,000,000

Output

For every test case in the input file, the output should contain a single number, on a single line: the number of occurrences of the word W in the text T

Sample Input

BAPC BAPC

AZA AZAZAZA

VERDI AVERDXIVYERDIAN

Sample Output

题目理解

这一题和C题的区别就是可以重复，这也很容易就可以处理，直接采用 $\small next(i)$ 的失配向前跳就能够得到最多的匹配情况

#include<iostream>
#include<cstdio>
#include<cstring>
#define maxn 1000005
using namespace std;
int _next[maxn];
void getNext(char* s)
{
    int len = strlen(s);
    _next[0] = -1;
    int i=0,k = -1;
    while(i < len)
    {
        while( k!=-1 && s[i] != s[k])
        {
            k = _next[k];
        }
        if(k==-1||s[i]==s[k])
        {
            ++i,++k;
            _next[i] = k;
        }
    }
    return ;
}
int kmp(char* str,char* s){
    int cnt=0;
    int sLen = strlen(s);
    int strLen = strlen(str);
    int i = 0,k=0;
    while(i < strLen)
    {
        while(k!=-1&&str[i]!=s[k])
        {
            k = _next[k];
        }
        if(k==-1 || str[i]==s[k])
            ++i,++k;
        if(k==sLen){
            cnt++;
            k=_next[k];
        }
    }
    return cnt;
}
int main()
{
    int t;
    char str[maxn],s[maxn];
    scanf("%d",&t);
    while(t--)
    {
        scanf("%s",s);
        scanf("%s",str);
        getNext(s);
        int cnt=kmp(str,s);
        printf("%d\n",cnt);
    }
    return 0;
}

H -- Cyclic Nacklace
Description

CC always becomes very depressed at the end of this month, he has checked his credit card yesterday, without any surprise, there are only 99.9 yuan left. he is too distressed and thinking about how to tide over the last days. Being inspired by the entrepreneurial spirit of "HDU CakeMan", he wants to sell some little things to make money. Of course, this is not an easy task.

As Christmas is around the corner, Boys are busy in choosing christmas presents to send to their girlfriends. It is believed that chain bracelet is a good choice. However, Things are not always so simple, as is known to everyone, girl's fond of the colorful decoration to make bracelet appears vivid and lively, meanwhile they want to display their mature side as college students. after CC understands the girls demands, he intends to sell the chain bracelet called CharmBracelet. The CharmBracelet is made up with colorful pearls to show girls' lively, and the most important thing is that it must be connected by a cyclic chain which means the color of pearls are cyclic connected from the left to right. And the cyclic count must be more than one. If you connect the leftmost pearl and the rightmost pearl of such chain, you can make a CharmBracelet. Just like the pictrue below, this CharmBracelet's cycle is 9 and its cyclic count is 2:

Now CC has brought in some ordinary bracelet chains, he wants to buy minimum number of pearls to make CharmBracelets so that he can save more money. but when remaking the bracelet, he can only add color pearls to the left end and right end of the chain, that is to say, adding to the middle is forbidden.
CC is satisfied with his ideas and ask you for help.

Input

The first line of the input is a single integer T ( 0 < T <= 100 ) which means the number of test cases
Each test case contains only one line describe the original ordinary chain to be remade. Each character in the string stands for one pearl and there are 26 kinds of pearls being described by 'a' ~'z' characters. The length of the string Len: ( 3 <= Len <= 100000 )

Output

For each case, you are required to output the minimum count of pearls added to make a CharmBracelet

Sample Input

aaa

abca

abcde

Sample Output

题目理解

理解F题的循环节以后直接计算就可以了，注意的是剩余部分需要分类讨论不能直接减，因为得到0的时候需要输出0直接减就输出串长了

#include<cstdio>
#include<cstring>
#define maxn 100007
int _next[maxn];
void getNext(char* s)
{
    int len = strlen(s);
    _next[0] = -1;
    int i=0,k = -1;
    while(i < len)
    {
        while( k!=-1 && s[i] != s[k])
        {
            k = _next[k];
        }
        if(k==-1||s[i]==s[k])
        {
            ++i,++k;
            _next[i] = k;
        }
    }
    return ;
}
int main()
{
    int t;
    char s[maxn];
    scanf("%d",&t);
    while(t--)
    {
        scanf("%s",s);
        getNext(s);//跨越的就是循环节最小循环节
        int m=strlen(s);
        int n=m-_next[m];
        if(n!=m&&m%n==0)printf("0\n");
        else printf("%d\n",n-_next[m]%n);

    }
    return 0;
}