自己搞OI的时候没学(惭愧,都冲进NOI了居然还不会后缀数组!),现在搞ACM了,就学了学人家的论文,做了几题,小结一下。
感觉这些题有些模块比较重要:
首先是求后缀数组:
int cmp(int *r,int a,int b,int l)
{
return r[a]==r[b] && r[a+l]==r[b+l];
}
void da(char *s,int *sa,int n,int m)
{
int *x=wa,*y=wb,*t;
int i,j,p;
for (i=0;i<m;i++) w[i]=0;
for (i=0;i<n;i++) w[x[i]=s[i]]++;
for (i=1;i<m;i++) w[i]+=w[i-1];
for (i=n-1;i>=0;i--) sa[--w[x[i]]]=i;
for (j=1,p=1;p<n;j*=2,m=p)
{
for (p=0,i=n-j;i<n;i++) y[p++]=i;
for (i=0;i<n;i++) if (sa[i]>=j) y[p++]=sa[i]-j;
for (i=0;i<n;i++) v[i]=x[y[i]];
for (i=0;i<m;i++) w[i]=0;
for (i=0;i<n;i++) w[v[i]]++;
for (i=1;i<m;i++) w[i]+=w[i-1];
for (i=n-1;i>=0;i--) sa[--w[v[i]]]=y[i];
for (t=x,x=y,y=t,p=1,x[sa[0]]=0,i=1;i<n;i++)
x[sa[i]]=cmp(y,sa[i-1],sa[i],j)?p-1:p++;
}
}
然后是求height数组
void calheight(char *s,int *sa,int n)
{
int i,j,k=0;
for (i=1;i<=n;i++) rk[sa[i]]=i;
for (i=0;i<n;h[rk[i++]]=k)
for (k?k--:0,j=sa[rk[i]-1];s[i+k]==s[j+k];k++);
}
然后是对height数组搞RMQ
void RMQ()
{
int i,j;
er[0]=1;
for (i=1;i<20;i++)
er[i]=er[i-1]*2;
log2[0]=-1;
for (i=1;i<=n;i++)
log2[i]=(i&(i-1))?log2[i-1]:log2[i-1]+1;
for (i=1;i<=n;i++)
f[i][0]=h[i];
for (j=1;j<20;j++)
for (i=1;i+er[j]-1<=n;i++)
f[i][j]=min(f[i][j-1],f[i+er[j-1]][j-1]);
}
然后是求公共前缀lcp
int lcp(int a,int b)
{
int x=rk[a],y=rk[b];这一句很重要,我经常忘记
if (x>y)
{
int t;
t=x;x=y;y=t;
}
x++;
int k=log2[y-x+1];
return min(f[x][k],f[y-er[k]+1][k]);
}
当然还有根据height数组的分组思想,和二分答案。
poj 3693
给定一个字符串,求重复次数最多的连续重复子串。
枚举长度L,然后求长为L的子串最多能连续出现几次。其实就是求lcp(i,i+L),不必像论文里写的向前向后匹配。令k=lcp(i,i+L),t=i-(l-k%l)。如果t>=0 && k%l!=0,那么ans=k/l+2,否则就是k/l+1。
#include <iostream>
#include <cstring>
const int maxn=100003;
char s[maxn];
int wa[maxn],wb[maxn],w[maxn],v[maxn],sa[maxn],rk[maxn],h[maxn],er[20],log2[maxn],a[maxn];
int f[maxn][20];
int n,cc;
using namespace std;
int cmp(int *r,int a,int b,int l)
{
return r[a]==r[b] && r[a+l]==r[b+l];
}
void da(char *s,int *sa,int n,int m)
{
int *x=wa,*y=wb,*t;
int i,j,p;
for (i=0;i<m;i++) w[i]=0;
for (i=0;i<n;i++) w[x[i]=s[i]]++;
for (i=1;i<m;i++) w[i]+=w[i-1];
for (i=n-1;i>=0;i--) sa[--w[x[i]]]=i;
for (j=1,p=1;p<n;j*=2,m=p)
{
for (p=0,i=n-j;i<n;i++) y[p++]=i;
for (i=0;i<n;i++) if (sa[i]>=j) y[p++]=sa[i]-j;
for (i=0;i<n;i++) v[i]=x[y[i]];
for (i=0;i<m;i++) w[i]=0;
for (i=0;i<n;i++) w[v[i]]++;
for (i=1;i<m;i++) w[i]+=w[i-1];
for (i=n-1;i>=0;i--) sa[--w[v[i]]]=y[i];
for (t=x,x=y,y=t,p=1,x[sa[0]]=0,i=1;i<n;i++)
x[sa[i]]=cmp(y,sa[i-1],sa[i],j)?p-1:p++;
}
}
void calheight(char *s,int *sa,int n)
{
int i,j,k=0;
for (i=1;i<=n;i++) rk[sa[i]]=i;
for (i=0;i<n;h[rk[i++]]=k)
for (k?k--:0,j=sa[rk[i]-1];s[i+k]==s[j+k];k++);
}
void RMQ()
{
int i,j;
er[0]=1;
for (i=1;i<20;i++)
er[i]=er[i-1]*2;
log2[0]=-1;
for (i=1;i<=n;i++)
log2[i]=(i&(i-1))?log2[i-1]:log2[i-1]+1;
for (i=1;i<=n;i++)
f[i][0]=h[i];
for (j=1;j<20;j++)
for (i=1;i+er[j]-1<=n;i++)
f[i][j]=min(f[i][j-1],f[i+er[j-1]][j-1]);
}
int lcp(int a,int b)
{
int x=rk[a],y=rk[b];
if (x>y)
{
int t;
t=x;x=y;y=t;
}
x++;
int k=log2[y-x+1];
return min(f[x][k],f[y-er[k]+1][k]);
}
void work()
{
int i,j,k,t,r,l,mm=0,tot=0;
for (l=1;l<n;l++)
for (i=0;i+l<n;i+=l)
{
k=lcp(i,i+l);
r=k/l+1;
t=i-(l-k%l);
if (t>=0 && k%l!=0)
if (lcp(t,t+l)>=k) r++;
if (r>mm)
{
tot=0;
a[++tot]=l;
mm=r;
}
else if (r==mm) {
a[++tot]=l; }
}
int st=0,tl=n;
for (i=1;i<n;++i)
for (j=1;j<=tot;++j)
{
k=lcp(sa[i],sa[i]+a[j]);
if (k>=(mm-1)*a[j])
{
st=sa[i];
tl=mm*a[j];
i=n;
break;
}
}
printf("Case %d: ",cc);
for (i=st;i<st+tl;i++)
printf("%c",s[i]);
printf("\n");
}
int main()
{
freopen("pin.txt","r",stdin);
freopen("pou.txt","w",stdout);
cc=0;
while (scanf("%s",s)!=EOF && !(s[0]=='#' && strlen(s)==1))
{
cc++;
n=strlen(s);
s[n]=0;
da(s,sa,n+1,128);
calheight(s,sa,n);
RMQ();
work();
}
return 0;
}
poj2406
论文解释的很清楚,求height数组中每一个到height[rank[1]]之间的最小值即可。
记住:是rank[1]
#include <iostream>
#include <cstring>
const int maxn=1000003;
char s[maxn];
int wa[maxn],wb[maxn],w[maxn],v[maxn],sa[maxn],rk[maxn],rm[maxn],h[maxn];
int n;
using namespace std;
int cmp(int *r,int a,int b,int l)
{
return r[a]==r[b] && r[a+l]==r[b+l];
}
void da(char *s,int *sa,int n,int m)
{
int *x=wa,*y=wb,*t;
int i,j,p;
for (i=0;i<m;i++) w[i]=0;
for (i=0;i<n;i++) w[x[i]=s[i]]++;
for (i=1;i<m;i++) w[i]+=w[i-1];
for (i=n-1;i>=0;i--) sa[--w[x[i]]]=i;
for (j=1,p=1;p<n;j*=2,m=p)
{
for (p=0,i=n-j;i<n;i++) y[p++]=i;
for (i=0;i<n;i++) if (sa[i]>=j) y[p++]=sa[i]-j;
for (i=0;i<n;i++) v[i]=x[y[i]];
for (i=0;i<m;i++) w[i]=0;
for (i=0;i<n;i++) w[v[i]]++;
for (i=1;i<m;i++) w[i]+=w[i-1];
for (i=n-1;i>=0;i--) sa[--w[v[i]]]=y[i];
for (t=x,x=y,y=t,p=1,x[sa[0]]=0,i=1;i<n;i++)
x[sa[i]]=cmp(y,sa[i-1],sa[i],j)?p-1:p++;
}
}
void calheight(char *s,int *sa,int n)
{
int i,j,k=0;
for (i=1;i<=n;i++) rk[sa[i]]=i;
for (i=0;i<n;h[rk[i++]]=k)
for (k?k--:0,j=sa[rk[i]-1];s[i+k]==s[j+k];k++);
}
void RMQ()
{
int i,k=rk[0];
rm[k]=1000000;
for (i=k-1;i>=0;i--)
if (h[i+1]<rm[i+1]) rm[i]=h[i+1];
else rm[i]=rm[i+1];
for (i=k+1;i<=n;i++)
if (h[i]<rm[i-1]) rm[i]=h[i];
else rm[i]=rm[i-1];
}
int work()
{
int i;
for (i=1;i<=n/2;i++)
{
if (n%i) continue;
if (rm[rk[i]]==n-i) return n/i;
}
return 1;
}
int main()
{
freopen("pin.txt","r",stdin);
freopen("pou.txt","w",stdout);
while (scanf("%s",s)!=EOF && !(s[0]=='.' && strlen(s)==1))
{
n=strlen(s);
s[n]=0;
da(s,sa,n+1,128);
calheight(s,sa,n);
RMQ();
printf("%d\n",work());
}
return 0;
}
poj2774
见论文
#include <iostream>
using namespace std;
const int maxn=200004;
int wa[maxn],wb[maxn],w[maxn],v[maxn],a[maxn],sa[maxn],rk[maxn],h[maxn];
char s[maxn],s1[maxn];
int n,n1;
int cmp(int *r,int a,int b,int l)
{
return r[a]==r[b] && r[a+l]==r[b+l];
}
void da(char *s,int *sa,int n,int m)
{
int *x=wa,*y=wb,*t,i,j,p;
for (i=0;i<m;i++) w[i]=0;
for (i=0;i<n;i++) w[x[i]=s[i]]++;
for (i=1;i<m;i++) w[i]+=w[i-1];
for (i=n-1;i>=0;i--) sa[--w[x[i]]]=i;
for (j=1,p=1;p<n;j*=2,m=p)
{
for (p=0,i=n-j;i<n;i++) y[p++]=i;
for (i=0;i<n;i++) if (sa[i]>=j) y[p++]=sa[i]-j;
for (i=0;i<n;i++) v[i]=x[y[i]];
for (i=0;i<m;i++) w[i]=0;
for (i=0;i<n;i++) w[v[i]]++;
for (i=1;i<m;i++) w[i]+=w[i-1];
for (i=n-1;i>=0;i--) sa[--w[v[i]]]=y[i];
for (t=x,x=y,y=t,p=1,x[sa[0]]=0,i=1;i<n;i++)
x[sa[i]]=cmp(y,sa[i-1],sa[i],j)?p-1:p++;
}
}
void calheight(char *s,int *sa,int n)
{
int i,j,k=0;
for (i=1;i<=n;i++) rk[sa[i]]=i;
for (i=0;i<n;h[rk[i++]]=k)
for (k?k--:0,j=sa[rk[i]-1];s[i+k]==s[j+k];k++);
}
int work()
{
int i,ans=0;
for (i=2;i<=n;i++)
if (sa[i]<n1) a[i]=0;
else a[i]=1;
for (i=3;i<=n;i++)
if (a[i]!=a[i-1] && h[i]>ans)
ans=h[i];
return ans;
}
int main()
{
freopen("pin.txt","r",stdin);
freopen("pou.txt","w",stdout);
scanf("%s",s);
scanf("%s",s1);
n1=strlen(s);
strcat(s,"$");
strcat(s,s1);
n=strlen(s);
s[n]=0;
da(s,sa,n+1,128);
calheight(s,sa,n);
printf("%d\n",work());
}
poj1743
就是应用二分答案和分组思想(我一开始很二叉的用数组记录了分组,表示就是循环的时候判断一下就可以了,详细可以见论文
#include <stdio.h>
using namespace std;
const int maxn=20005;
int wa[maxn],wb[maxn],sa[maxn],rk[maxn],s[maxn],w[maxn],h[maxn],v[maxn];
int n,ans;
void init()
{
int i;
for (i=0;i<n;i++)
scanf("%d",&s[i]);
for (i=0;i<n-1;i++)
s[i]=s[i+1]-s[i]+100;
s[--n]=0;
}
int cmp(int* r,int a,int b,int l)
{
return r[a]==r[b] && r[a+l]==r[b+l];
}
void da(int* s,int* sa,int n,int m)
{
int* x=wa;int* y=wb;int* t;
int i,j,p;
for (i=0;i<m;i++) w[i]=0;
for (i=0;i<n;i++) w[x[i]=s[i]]++;
for (i=1;i<m;i++) w[i]+=w[i-1];
for (i=n-1;i>=0;i--) sa[--w[x[i]]]=i;
for (j=1,p=1;p<n;j*=2,m=p)
{
for (p=0,i=n-j;i<n;i++) y[p++]=i;
for (i=0;i<n;i++) if (sa[i]>=j) y[p++]=sa[i]-j;
for (i=0;i<m;i++) w[i]=0;
for (i=0;i<n;i++) v[i]=x[y[i]];
for (i=0;i<n;i++) w[v[i]]++;
for (i=1;i<m;i++) w[i]+=w[i-1];
for (i=n-1;i>=0;i--) sa[--w[v[i]]]=y[i];
for (t=x,x=y,y=t,x[sa[0]]=0,p=1,i=1;i<n;i++)
x[sa[i]]=cmp(y,sa[i-1],sa[i],j)?p-1:p++;
}
}
void calheight(int* s,int* sa,int n)
{
int i,j,k=0;
for (i=1;i<=n;i++)
rk[sa[i]]=i;
for (i=0;i<n;h[rk[i++]]=k)
for (k?k--:0,j=sa[rk[i]-1];s[i+k]==s[j+k];k++);
}
int ok(int t)
{
int smax,smin,i,j;
smin=smax=sa[1];
for (i=2;i<=n;i++)
{
if (h[i]>=t && i<n)
{
if (sa[i]>smax) smax=sa[i];
if (sa[i]<smin) smin=sa[i];
continue;
}
if (smax-smin>=t) return 1;
smin=smax=sa[i];
}
return 0;
}
void bin()
{
int ll=4,rr=n,mid;
while (ll<=rr)
{
mid=(ll+rr)/2;
if (ok(mid)) ans=mid,ll=mid+1;
else rr=mid-1;
}
}
int main()
{
freopen("pin.txt","r",stdin);
freopen("pou.txt","w",stdout);
while (scanf("%d",&n),n)
{
init();
da(s,sa,n+1,200);
calheight(s,sa,n);
bin();
ans++;
if (ans<5) ans=0;
printf("%d\n",ans);
}
return 0;
}