记数排序
void ssort()
{
memset(a,0,sizeof(a));
int mx=0;fo(i,1,n) a[x[y[i]]]++,mx=max(mx,x[y[i]]);
fo(i,1,mx) a[i]+=a[i-1];
for(int i=n;i>0;i--) sa[a[x[y[i]]]]=y[i],a[x[y[i]]]--;
}
求SA和rank
x为排序后的字符串,y为排序第二关键字,sa[i]为排第i是什么,rank[i]为第i排第几
height[i]为sa[i]和sa[i-1]的最长公共前缀
void getsa()
{
fo(i,1,n) y[i]=i,x[i]=s[i];ssort();
for(int j=1;j<=n;j*=2)
{
int k=0;fo(i,n-j+1,n) y[++k]=i;
fo(i,1,n) if (sa[i]>j) y[++k]=sa[i]-j;
ssort();
fo(i,1,n) y[i]=x[i],x[i]=0;
x[sa[1]]=1;k=1;
fo(i,2,n)
{
if (y[sa[i]]!=y[sa[i-1]] || y[sa[i]+j]!=y[sa[i-1]+j]) k++;
x[sa[i]]=k;
}
if (k==n) break;
}
fo(i,1,n) rank[sa[i]]=i;
}
求height
void getheight()
{
int k=0;
fo(i,1,n)
{
int j=sa[rank[i]-1];
while (i+k<=n && j+k<=n && s[i+k]==s[j+k]) k++;
height[rank[i]]=k;
if (k>0) k--;
}
}
完整代码
题目:文件修复
求字符串中至少出现两次的个数
#include <cstdio>
#include <cmath>
#include <algorithm>
#include <cstring>
#include <iostream>
#include <cstdlib>
#include <iostream>
#define fo(i,a,b) for(int i=a;i<=b;i++)
#define N 101000
using namespace std;
int n,i,j,k,a[N],sa[N],rank[N],height[N],x[N],y[N];
char s[N];
void ssort()
{
memset(a,0,sizeof(a));
int mx=0;fo(i,1,n) a[x[y[i]]]++,mx=max(mx,x[y[i]]);
fo(i,1,mx) a[i]+=a[i-1];
for(int i=n;i>0;i--) sa[a[x[y[i]]]]=y[i],a[x[y[i]]]--;
}
void getsa()
{
fo(i,1,n) y[i]=i,x[i]=s[i];ssort();
for(int j=1;j<=n;j*=2)
{
int k=0;fo(i,n-j+1,n) y[++k]=i;
fo(i,1,n) if (sa[i]>j) y[++k]=sa[i]-j;
ssort();
fo(i,1,n) y[i]=x[i],x[i]=0;
x[sa[1]]=1;k=1;
fo(i,2,n)
{
if (y[sa[i]]!=y[sa[i-1]] || y[sa[i]+j]!=y[sa[i-1]+j]) k++;
x[sa[i]]=k;
}
if (k==n) break;
}
fo(i,1,n) rank[sa[i]]=i;
}
void getheight()
{
int k=0;
fo(i,1,n)
{
int j=sa[rank[i]-1];
while (i+k<=n && j+k<=n && s[i+k]==s[j+k]) k++;
height[rank[i]]=k;
if (k>0) k--;
}
}
int main()
{
scanf("%s",s+1);n=strlen(s+1);
getsa();getheight();
int ans=0;
fo(i,1,n) if (height[i]>height[i-1]) ans=ans+height[i]-height[i-1];
printf("%d",ans);
}