后缀数组向来很强大,其中它的统计功能是一方面。
下面以两道题目为例说明一下如何使用强大的后缀数组进行统计
【例1】POJ 3415http://poj.org/problem?id=3415
这题是求两串中长度>=K的子串的个数(可重复)
做法:首先最正路的做法就是用后缀数组把两个串链接起来,中间加个没出现过的字符,对新串求heigh数组,我们先对遇到A的可以对前面与B的lcp进行相加,同样遇到属于B的也对前面与A的lcp进行相加,现在问题是怎么相加,O(n^2)算法明显不可行?有个东东叫”单调栈“,顾名思义,它的元素是单调的。单调栈的最常见用途是求子矩阵最大面积,现在也类似的,因为任意两个后缀的lcp是区间的最小值。更具体可以参考这个blog http://www.cnblogs.com/Booble/archive/2010/12/14/1906147.html,顺便提一点,我们可以用一个数组保存当前lcp最小值为h的区间数,可以在栈操作的同时进行更新。
#define maxn 200100
int wa[maxn],wb[maxn],wv[maxn],wss[maxn];
int r[maxn],sa[maxn];
int cmp(int *r,int a,int b,int l)
{return r[a]==r[b] && r[a+l]==r[b+l];}
void da(int *r,int *sa,int n,int m){
int i,j,p,*x=wa,*y=wb,*t;
for(i=0;i<m;i++) wss[i]=0;
for(i=0;i<n;i++) wss[x[i]=r[i]]++;
for(i=1;i<m;i++) wss[i]+=wss[i-1];
for(i=n-1;i>=0;i--) sa[--wss[x[i]]]=i;
for(j=1,p=1;p<n;j*=2,m=p){
for(p=0,i=n-j;i<n;i++) y[p++]=i;
for(i=0;i<n;i++) if(sa[i]>=j) y[p++]=sa[i]-j;
for(i=0;i<n;i++) wv[i]=x[y[i]];
for(i=0;i<m;i++) wss[i]=0;
for(i=0;i<n;i++) wss[wv[i]]++;
for(i=1;i<m;i++) wss[i]+=wss[i-1];
for(i=n-1;i>=0;i--) sa[--wss[wv[i]]]=y[i];
for(t=x,x=y,y=t,p=1,x[sa[0]]=0,i=1;i<n;i++)
x[sa[i]]=cmp(y,sa[i-1],sa[i],j)?p-1:p++;
}
return;
}
int rank[maxn],height[maxn];//rank[i]:i排第几;sa[i]:排第i的后缀串在哪里,互为逆运算
void calheight(int *r,int *sa,int n){//n不用加1
int i,j,k=0;
for(i=1;i<=n;i++) rank[sa[i]]=i;
for(i=0;i<n;height[rank[i++]]=k){
for(k?k--:0,j=sa[rank[i]-1];r[i+k]==r[j+k];k++);
}
return;
}
char a[maxn],b[maxn];
int fa[maxn],fb[maxn],fc[maxn];//fa记录每个height,fb记录属于哪个串,fc记录合并后当前最小值为h连续“矩形”数
int st[maxn];
int main(){
int k;
int i,j;
while(scanf("%d",&k) && k){
int len;
scanf("%s%s",a,b);
int la = strlen(a);
a[la] = '#';a[la+1] = '\0';
strcat(a,b);
len = strlen(a);
for(i=0;i<len;i++){
r[i] = a[i];
}
r[len] = 0;
da(r,sa,len+1,199);
calheight(r,sa,len);
for(i=0;i<=len;i++){
fb[i] = (sa[i]<la);
fa[i] = (height[i]>=k) ? height[i]-k+1 : 0;
}
LL ans = 0;
st[0] = -1,fa[len+1] = 0;
for(j=0;j<=1;j++){
LL sum = 0;
for(int top = 0,i=2;i<=len;i++){
if(fb[i]!=j)ans+=sum;
st[++top] = fa[i+1];
fc[top] = (fb[i]==j);
sum += (LL)st[top]*(LL)fc[top];
while(st[top-1]>=st[top]){
sum -= (LL)(st[top-1]-st[top])*(LL)fc[top-1];
st[top-1] = st[top];
fc[top-1] += fc[top];//合并区间
top--;
}
}
}
printf("%I64d\n",ans);
}
return 0;
}
【例2】
E. Prefix Sum
Time Limit : 6000/3000ms (Java/Other) Memory Limit : 65535/32768K (Java/Other)
Total Submission(s) : 88 Accepted Submission(s) : 14
Font: Times New Roman | Verdana | Georgia
Font Size: ← →
Problem Description
A string v is a suffix string of a string w if string v can read from a position of string w and to the end of w.
For example, string bc is a suffix string of abc. but ab is not.
A string v is a prefix string of a string w if string v can read from the beginning of string w.
For example, string ab is prefix string of string abc, but bc and abcd are not.
For 2 strings s1 and s2, if there is a string s3 is both the prefix of s1 and s2, we call s3 is a common prefix of s1 and s2.
The longest common prefix of 2 strings is the longest common prefix string of all the common prefix strings among these 2 strings.
Your task is:
Give you the string, count the sum of the length of each of the longest common prefix string of each 2 suffix of the string.
For example, string bc is a suffix string of abc. but ab is not.
A string v is a prefix string of a string w if string v can read from the beginning of string w.
For example, string ab is prefix string of string abc, but bc and abcd are not.
For 2 strings s1 and s2, if there is a string s3 is both the prefix of s1 and s2, we call s3 is a common prefix of s1 and s2.
The longest common prefix of 2 strings is the longest common prefix string of all the common prefix strings among these 2 strings.
Your task is:
Give you the string, count the sum of the length of each of the longest common prefix string of each 2 suffix of the string.
Input
There are multi strings. One string per line. Each string is no longer than 10^5. The strings only contain A-Z and a-z.
Output
For each string, output the sum.
Sample Input
ABC ABABA AABB
Sample Output
0 7 2
Source
SCAUCPC 2012
这是华农校赛一道”难题“,其实不难,用后缀数组完全可做。
题意很简单:给出一个串,计算两两后缀的最长前缀的长度,并求和。
方法:后缀数组+单调栈优化
#define maxn 100100
int wa[maxn],wb[maxn],wv[maxn],wss[maxn];
int r[maxn],sa[maxn];
int cmp(int *r,int a,int b,int l)
{return r[a]==r[b] && r[a+l]==r[b+l];}
void da(int *r,int *sa,int n,int m){
int i,j,p,*x=wa,*y=wb,*t;
for(i=0;i<m;i++) wss[i]=0;
for(i=0;i<n;i++) wss[x[i]=r[i]]++;
for(i=1;i<m;i++) wss[i]+=wss[i-1];
for(i=n-1;i>=0;i--) sa[--wss[x[i]]]=i;
for(j=1,p=1;p<n;j*=2,m=p){
for(p=0,i=n-j;i<n;i++) y[p++]=i;
for(i=0;i<n;i++) if(sa[i]>=j) y[p++]=sa[i]-j;
for(i=0;i<n;i++) wv[i]=x[y[i]];
for(i=0;i<m;i++) wss[i]=0;
for(i=0;i<n;i++) wss[wv[i]]++;
for(i=1;i<m;i++) wss[i]+=wss[i-1];
for(i=n-1;i>=0;i--) sa[--wss[wv[i]]]=y[i];
for(t=x,x=y,y=t,p=1,x[sa[0]]=0,i=1;i<n;i++)
x[sa[i]]=cmp(y,sa[i-1],sa[i],j)?p-1:p++;
}
return;
}
int rank[maxn],height[maxn];//rank[i]:i排第几;sa[i]:排第i的后缀串在哪里,互为逆运算
void calheight(int *r,int *sa,int n){//n不用加1
int i,j,k=0;
for(i=1;i<=n;i++) rank[sa[i]]=i;
for(i=0;i<n;height[rank[i++]]=k){
for(k?k--:0,j=sa[rank[i]-1];r[i+k]==r[j+k];k++);
}
return;
}
char str[maxn];
int c[maxn];//c数组记录最小值h能延伸多少个区间,在栈操作的同时进行合并
int st[maxn];//手写栈
int dp[maxn];//dp数组记录前面矩形个数,类似于dp
int main(){
while(scanf("%s",str) !=-1) {
int i,j;
int n = strlen(str);
for(i=0;i<n;i++){
r[i] = str[i];
}
da(r,sa,n+1,199);
calheight(r,sa,n);
LL ans = 0;
st[0] = -1;
height[n+1] = 0;
dp[0] = 0;
for(int top=0,i=1;i<=n;i++){
st[++top] = height[i+1];
c[top] = 1;
while(st[top-1]>=st[top]){
st[top-1] = st[top];
c[top-1] += c[top];
top--;
}
ans += c[top]*st[top]+dp[top-1];
dp[top] = dp[top-1]+c[top]*st[top];
}
printf("%I64d\n",ans);
}
return 0;
}