spoj 687. Repeats 重复次数最多的子串(求最大重复次数)

最新推荐文章于 2019-11-23 00:12:41 发布

kongming_acm

最新推荐文章于 2019-11-23 00:12:41 发布

阅读量1.3k

点赞数

分类专栏： acm_数据结构文章标签： string character input ini 算法 each

本文链接：https://blog.csdn.net/kongming_acm/article/details/6321363

版权

acm_数据结构专栏收录该内容

112 篇文章 1 订阅

订阅专栏

A string s is called an (k,l)-repeat if s is obtained by concatenating k>=1 times some seed string t with length l>=1. For example, the string

s = abaabaabaaba

is a (4,3)-repeat with t = aba as its seed string. That is, the seed string t is 3 characters long, and the whole string s is obtained by repeating t 4 times.

Write a program for the following task: Your program is given a long string u consisting of characters ‘a’ and/or ‘b’ as input. Your program must find some (k,l)-repeat that occurs as substring within u with k as large as possible. For example, the input string

u = babbabaabaabaabab

contains the underlined (4,3)-repeat s starting at position 5. Since u contains no other contiguous substring with more than 4 repeats, your program must output the maximum k.

Input

In the first line of the input contains H- the number of test cases (H <= 20). H test cases follow. First line of each test cases is n - length of the input string (n <= 50000), The next n lines contain the input string, one character (either ‘a’ or ‘b’) per line, in order.

Output

For each test cases, you should write exactly one interger k in a line - the repeat count that is maximized.

Example

Input:
1
17
b
a
b
b
a
b
a
a
b
a
a
b
a
a
b
a
b

Output:
4

since a (4, 3)-repeat is found starting at the 5th character of the input string.

#include<iostream>

#include<cstdio>

#include<cstring>

#include<algorithm>

#include<cmath>

using namespace std;

///后缀数组倍增算法

const int maxn=500000;

char str[maxn];

int wa[maxn],wb[maxn],wv[maxn],wn[maxn],a[maxn],sa[maxn];

int n;///字符串长度

int cmp(int* r,int a,int b,int l)

{return r[a]==r[b]&&r[a+l]==r[b+l];}

/**n为字符串长度，m为字符的取值范围，r为字符串。后面的j为每次排

序时子串的长度*/

void DA(int* r,int* sa,int n,int m)

{

int i,j,p,*x=wa,*y=wb,*t;

///对R中长度为1的子串进行基数排序

for(i=0;i<m;i++)wn[i]=0;

for(i=0;i<n;i++)wn[x[i]=r[i]]++;

for(i=1;i<m;i++)wn[i]+=wn[i-1];

for(i=n-1;i>=0;i--)sa[--wn[x[i]]]=i;

for(j=1,p=1;p<n;j*=2,m=p)

{

/**利用了上一次基数排序的结果，对待排序的子串的第二关键字进行

了一次高效地基数排序*/

for(p=0,i=n-j;i<n;i++)y[p++]=i;

for(i=0;i<n;i++)if(sa[i]>=j)y[p++]=sa[i]-j;

///基数排序

for(i=0;i<n;i++)wv[i]=x[y[i]];

for(i=0;i<m;i++)wn[i]=0;

for(i=0;i<n;i++)wn[wv[i]]++;

for(i=1;i<m;i++)wn[i]+=wn[i-1];

for(i=n-1;i>=0;i--)sa[--wn[wv[i]]]=y[i];

///当p=n的时候，说明所有串都已经排好序了

///在第一次排序以后，_rank数组中的最大值小于p，所以让m=p

for(t=x,x=y,y=t,p=1,x[sa[0]]=0,i=1;i<n;i++)

x[sa[i]]=cmp(y,sa[i-1],sa[i],j)?p-1:p++;

}

return;

}

///后缀数组计算height数组

/**

height数组的值应该是从height[1]开始的，而且height[1]应该是等于0的。

原因是，+因为我们在字符串后面添加了一个0号字符，所以它必然是最小的

一个后缀。而字符串中的其他字符都应该是大于0的（前面有提到，使用倍

增算法前需要确保这点），所以排名第二的字符串和0号字符的公共前缀

（即height[1]）应当为0.在调用calheight函数时，要注意height数组的范

围应该是[1..n]。所以调用时应该是calheight(r,sa,n)

而不是calheight(r,sa,n+1)。*/

int _rank[maxn],height[maxn];

void calheight(int* r,int* sa,int n)

{

int i,j,k=0;

for(i=1;i<=n;i++)_rank[sa[i]]=i;

for(i=0;i<n;height[_rank[i++]]=k)

for(k?k--:0,j=sa[_rank[i]-1];r[i+k]==r[j+k];k++);

return;

}

//RMQ 求任意区间的最小值

int d[maxn];

int dpmin[maxn][25];

void creat_dpmin(int n)

{

int i,j;

for(i=1;i<=n;i++) dpmin[i][0]=d[i];

for(j=1;j<=log(double(n+1))/log(2.0);j++)

{

for(i=1;i+(1<<j)-1<=n;i++)

{

dpmin[i][j]=min(dpmin[i][j-1],dpmin[i+(1<<(j-1))][j-1]);

}

//求任意区间的最小值

int get_min(int x,int y)

{

int k=(int)(log(double(y-x+1))/log(2.0));

return min(dpmin[x][k],dpmin[y-(1<<k)+1][k]);

}

//后缀数组 + RMQ

//求第i个后缀和第j个后缀的最长公共前缀

int get_min_lcp(int x,int y)

{

x=_rank[x],y=_rank[y];

if(x>y) swap(x,y);

x++;//利用height数组

int k=(int)(log(double(y-x+1))/log(2.0));

return min(dpmin[x][k],dpmin[y-(1<<k)+1][k]);

}

int main()

{

//scanf("%s",str);//待处理字符串

//后缀数组倍增算法使用方法

/**

在使用倍增算法前，需要保证r数组的值均大于0。然后要在原字

符串后添加一个0号字符，具体原因参见罗穗骞的论文。这时候，

若原串的长度为n，则实际要进行后缀数组构建的r数组的长度应

该为n+1.所以调用DA函数时，对应的n应为n+1.*/

/*int n=strlen(str);

for(int i=0;i<n;i++) a[i]=(int)str[i];

a[n]=0;

DA(a,sa,n+1,256);

calheight(a,sa,n);*/

//....................................

int ci;scanf("%d",&ci);

while(ci--)

{

scanf("%d",&n);

for(int i=0;i<n;i++)

{

scanf("%s",str);

a[i]=(int)str[0];

}

a[n]=0;

DA(a,sa,n+1,256);

calheight(a,sa,n);

for(int i=1;i<=n;i++) d[i]=height[i];

creat_dpmin(n);

int ans=0;//answer

for(int i=1;i<n;i++)//长度为i的子串

{

for(int j=0;j+i<n;j+=i)

{

int k=get_min_lcp(j,j+i);

int now=k/i;

/**还差i-k%i个字符可以匹配多一个周期

把j向前移动i-k%i个字符，

如果可以匹配出来一个周期

now就+1

int tj=j-(i-k%i);

if(tj>=0)

if(get_min_lcp(tj,tj+i)>=(i-k%i)) now++;

if(now+1>ans) ans=now+1;//important +1

}

printf("%d/n",ans);

}

return 0;

}

kongming_acm

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
1
评论
复制链接

分享到 QQ

分享到新浪微博

扫一扫

专栏目录