poj2406(kmp,后缀数组)

最新推荐文章于 2019-05-04 22:11:30 发布

demo

最新推荐文章于 2019-05-04 22:11:30 发布

阅读量1k

点赞数

分类专栏： poj 文章标签： up c struct input

本文链接：https://blog.csdn.net/ysjjovo/article/details/6637406

版权

poj 专栏收录该内容

16 篇文章 0 订阅

订阅专栏

开始是用后缀数组做的，RMQ果断超了内存，全部改用unsigned short结果又超时，后来参考了论文里的方法。

关键思想是设k为最小重复字符串的长度，则s[0,1……n-k-1]=s[k,k+1……n-1],即lcp(rank[0],rank[k])=n-k;由于0位置是固定的，只需要对名次循环求出最小的k就行！

其复杂度为O(n)，可是还是超时，因为后缀数组预处理的时间是n*log(n)，觉得这题是卡了后缀数组这种方法应该是行不通了，后来一搜，结果全是kmp的，直接根据next数组就可以计算出来，汗啊！

参考：http://blog.sina.com.cn/s/blog_59bc06c40100mg72.html

1.kmp代码

#include<cstdio>
#include<cstring>
#include<iostream>
using namespace std;
//poj2406
#define N 1000010
char P[N];
int n;
int next[N]={-1};
void getNext()//注意循环的边界！
{
    for(int i=0,j=-1;i<n-1;)//计算next[i+1]
    {
        if(P[i+1]==P[j+1])next[++i]=++j;
        else
        {
            if(j!=-1)j=next[j];
            else next[++i]=-1;
        }
    }
}
int main()
{
//    freopen("data.in","r",stdin);
    while(scanf("%s",P)&&strcmp(P,".")!=0)
    {
        n=strlen(P);
        getNext();
        int len=n-1-next[n-1];
        if(n%len==0)cout<<n/len;
        else cout<<1;
        cout<<endl;
    }
}

2.后缀数组超时的代码

#include<cstdio>
#include<cstring>
#include<cmath>
#include<iostream>
using namespace std;
//   freopen("data.in","r",stdin);

#define _N 1000005
struct SuffixArray
{
    //SuffixArray::none
    static char const NONE='z'+1;//所有字符都不出现的
    int *r,*rank,*c,*sa,*secondSa,*first,*height;
    //r[i]为输入数据从r[i]>=0
    //rank[i]保存每一趟排序后的下标i名次
    //secondSa[i]保存名次为i第二关键字的下标
    //first[i]在第二关键字排名为i的条件下第一关键字的排名

    int n,up;//上界r[i]<up
    void init()
    {
        r=new int [_N];
        rank=new int[2*_N];
        c=new int[_N];//开辟的空间原则上应该是字符中最大值
        sa=new int[_N];
        secondSa=new int[2*_N];
        first=new int[_N];
        height=new int[_N];

    }
    void input(char * s,int n)
    {
        this->n=n;
        up=0;
        for(int i=0; i<n; i++)
        {
            r[i]=int(s[i]);
            if(up<r[i])up=r[i];
        }
        up++;
        r[n]=up;//末尾与所有字符不同，用于height数组的计算
    }
    bool cmp(int *rank,int a,int b,int delta)
    {
        return rank[a]==rank[b] && rank[a+delta]==rank[b+delta];
    }
    void calSA()
    {
        for(int i=0; i<2*n; i++)rank[i]=secondSa[i]=-1;
        //cmp函数比较第1，2关键字时避免越界的
        //判断开2倍空间并且赋值比所有排名还低的排名（因为这个时无效排名）
        //计数排序初使化sa[],rank[]
        for(int i=0; i<up; i++)c[i]=0;
        for(int i=0; i<n; i++) c[rank[i]=r[i]]++;
        for(int i=1; i<up; i++)c[i]+=c[i-1];
        for(int i=n-1; i>-1; i--)sa[--c[rank[i]]]=i; //从大到小

        int p=0;//p<n都可（进入循环的条件）
        for(int halfLen=1; p<n; halfLen<<=1,up=p)
        {
            //倍增时每个字符串的长度的一半为halfLen
            p=0;//此处初使化的含义和上面的循环条件不同
            //p此处的含义是对secondSa下标的初使化
            //循环里的条件的含义是最大的名次如果等于n
            //则rank[0,1……n-1]已经唯一了，不需要继续执行了

            for(int i=n-halfLen; i<n; i++)secondSa[p++]=i;
            //超出范围的下标按下标顺序置其名次为最低（稳定排序）
            for(int i=0; i<n; i++)if(sa[i]>=halfLen)secondSa[p++]=sa[i]-halfLen;

            //含义是名次为上次排名为i的的序号对应于第二关键字的序号为:sa[i]-halfLen
            for(int i=0; i<n; i++)first[i]=rank[secondSa[i]];
            //在第二关键字排名为i的条件下第一关键字的排名
            for(int i=0; i<up; i++)c[i]=0;//这里上界不是n而是up
            for(int i=0; i<n; i++)c[first[i]]++;
            for(int i=1; i<up; i++)c[i]+=c[i-1];
            for(int i=n-1; i>-1; i--)sa[--c[first[i]]]=secondSa[i]; //从大到小（第二关键字已经是有序）

            int *tmp,i;
            for(tmp=rank,rank=secondSa,secondSa=tmp,rank[sa[0]]=0,i=p=1; i<n; i++)
            {
                //rank[]其实是根据它自己进行计算
                rank[sa[i]]=cmp(secondSa,sa[i],sa[i-1],halfLen)?p-1:p++;
                //最后一个字符的用处sa[i]+halLen或sa[i-1]+halfLen会越界！！
                //重新筛出排名相等的下标
                //若第一，二关键字都不同则排名应该+1
            }
        }
    }
    void calHeight()
    {
        //根据性质h[i]>=h[i-1]-1,height[rank[i]]=h[i]依次计算出h[0],h[1],……h[n-1]
        int preRankLable,preLen=0;
        //preRankLable表示前一名的下标
        //preLen表示下标为cur-1的长度
        for(int cur=0; cur<n; cur++)
        {
            if(rank[cur])
            {
                for(preLen?--preLen:0,preRankLable=sa[rank[cur]-1]; r[cur+preLen]==r[preRankLable+preLen]; preLen++);
                height[rank[cur]]=preLen;
            }
            else height[0]=preLen=0;
        }
    }
} suffixArray;
 char master[_N];
int main()
{
 // freopen("data.in","r",stdin);
    suffixArray.init();

    while( scanf("%s",master)!=EOF&&strcmp(master,".")!=0)
    {
        int n=strlen(master);
        suffixArray.input(master,n);
        suffixArray.calSA();
        suffixArray.calHeight();

        int min_k=n;
        int base=suffixArray.rank[0],max_match=n;
        for(int r=base+1; r<n; r++)
        {
            max_match=min(max_match,suffixArray.height[r]);
            int k=suffixArray.sa[r];
            if(max_match==n-k&&n%k==0)min_k=min(min_k,k);
        }

        max_match=n;
        for(int r=base-1; r>-1; r--)
        {
            max_match=min(max_match,suffixArray.height[r+1]);
            int k=suffixArray.sa[r];
            if(max_match==n-k&&n%k==0)min_k=min(min_k,k);
        }
        cout<<n/min_k<<endl;
    }
    return 0;
}

demo

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
0
评论
poj2406(kmp,后缀数组)

开始是用后缀数组做的，RMQ果断超了内存，全部改用unsigned short结果又超时，后来参考了论文里的方法。关键思想是设k为最小重复字符串的长度，则s[0,1……n-k-1]=s[k,k+1……n-1],即lcp(rank[0],rank[k])=n-k;由于0位置是
复制链接

扫一扫