Hdu 4622 Reincarnation 后缀数组/后缀自动机

Reincarnation

Time Limit: 6000/3000 MS (Java/Others)    Memory Limit: 131072/65536 K (Java/Others)
Total Submission(s): 3921    Accepted Submission(s): 1541


Problem Description
Now you are back,and have a task to do:
Given you a string s consist of lower-case English letters only,denote f(s) as the number of distinct sub-string of s.
And you have some query,each time you should calculate f(s[l...r]), s[l...r] means the sub-string of s start from l end at r.
 

Input
The first line contains integer T(1<=T<=5), denote the number of the test cases.
For each test cases,the first line contains a string s(1 <= length of s <= 2000).
Denote the length of s by n.
The second line contains an integer Q(1 <= Q <= 10000),denote the number of queries.
Then Q lines follows,each lines contains two integer l, r(1 <= l <= r <= n), denote a query.
 

Output
For each test cases,for each query,print the answer in one line.
 

Sample Input
  
  
2 bbaba 5 3 4 2 2 2 5 2 4 1 4 baaba 5 3 3 3 4 1 4 3 5 5 5
 

Sample Output
  
  
3 1 7 5 8 1 3 8 5 1
Hint
I won't do anything against hash because I am nice.Of course this problem has a solution that don't rely on hash.
 

Author
WJMZBMR
 

Source


求一个字符串区间内不同的子串个数。


1.后缀数组做法:

先求后缀数组。

若求一个串不同的子串个数,只要在所有子串个数里面减去height就可以。这题我们同样的,先对整个串求后缀数组,每次查询时取区间内求好的height,用倍增进行查询。

这样会有一个问题。如果两段后缀分别是abac,ac,查询时取aba,a两段,两次的字典序发生了改变。

这时,需要比较一下lcp和两段区间串的长度。如果lcp比两段都长,就取之前的串而不更新,这样就可以保证字典序不变了。

理论复杂度O(nlogn+qn),耗时1263ms


#include <cstdio>
#include <string.h>
#include <string> 
#include <map>
#include <queue>
#include <vector>
#include <set>
#include <algorithm>
#include <math.h>
#include <cmath>
#include <stack>
#define mem0(a) memset(a,0,sizeof(a))
#define meminf(a) memset(a,0x3f,sizeof(a))
using namespace std;
typedef long long ll;
typedef long double ld;
typedef double db;
const int maxn=2005,inf=0x3f3f3f3f;  
const ll llinf=0x3f3f3f3f3f3f3f3f;   
const ld pi=acos(-1.0L);
int wa[maxn],wb[maxn],wv[maxn],ws[maxn],sa[maxn],ranki[maxn],height[maxn];
int s[maxn];
int mn[maxn][12],lcp[maxn];
char a[maxn];

int cmp(int *r,int a,int b,int l) {
	return r[a]==r[b]&&r[a+l]==r[b+l];
}

void build(int *r,int *sa,int n,int m) {
	int i,j,k,p,*x=wa,*y=wb,*t;
	
	for (i=0;i<m;i++) ws[i]=0;
	for (i=0;i<n;i++) ws[x[i]=r[i]]++;
	for (i=0;i<m;i++) ws[i]+=ws[i-1];
	for (i=n-1;i>=0;i--) 
	    sa[--ws[x[i]]]=i;
	for (j=1,p=1;p<n;j*=2,m=p) {
		for (p=0,i=n-j;i<n;i++) 
		    y[p++]=i;
		for (i=0;i<n;i++) 
		    if (sa[i]>=j) y[p++]=sa[i]-j;
		for (i=0;i<n;i++) 
		    wv[i]=x[y[i]];
		for (i=0;i<m;i++) ws[i]=0;
		for (i=0;i<n;i++) 
		    ws[wv[i]]++;
		for (i=1;i<m;i++) ws[i]+=ws[i-1];
		for (i=n-1;i>=0;i--) 
		    sa[--ws[wv[i]]]=y[i];
		
		t=x;x=y;y=t;
		p=1;x[sa[0]]=0;
		for (i=1;i<n;i++) 
		    x[sa[i]]=cmp(y,sa[i-1],sa[i],j)?p-1:p++;
	}
	
	for (i=1;i<n;i++) ranki[sa[i]]=i;
	k=0; 
	for (i=0;i<n-1;height[ranki[i++]]=k) {
		if (k) k--;
		for (j=sa[ranki[i]-1];r[i+k]==r[j+k];k++);
	}
}

void init(int n) {
	int i,j;
	for (i=1;i<=n;i++) mn[i][0]=height[i];
	for (j=1;(1<<j)<=n;j++) {
		for (i=1;i+(1<<j)-1<=n;i++) {
			mn[i][j]=min(mn[i][j-1],mn[i+(1<<(j-1))][j-1]);
		}
	}
}

int rmq(int a,int b) {
	int fa=ranki[a],fb=ranki[b];
	if (fa>fb) swap(fa,fb);
	fa++;
	int k=0;
	while ((1<<(k+1))<=(fb-fa+1)) k++;
	return min(mn[fa][k],mn[fb-(1<<k)+1][k]);
}

int main() {
	int cas;
	scanf("%d",&cas);
	while (cas--) {
		scanf("%s",a);
		int n=strlen(a),i,j;
		for (i=0;i<n;i++) {
			s[i]=a[i]-'a'+1;
		}
		s[n]=0;
		build(s,sa,n+1,27);
		init(n);
		int q,l,r,ans;
		scanf("%d",&q);
		for (i=1;i<=q;i++) {
			scanf("%d%d",&l,&r);
			ans=(r-l+1)*(r-l+2)/2;
			r--;l--;
			int last=-1;
			for (j=1;j<=n;j++) {
				if (sa[j]>=l&&sa[j]<=r) {
					if (last==-1) last=j; else {
						int len=rmq(sa[last],sa[j]);
						int k=min(r-sa[last]+1,r-sa[j]+1);
						if (!(sa[last]<sa[j]&&len>=k)) last=j;
						len=min(len,k);
						ans-=len;
					}
				}
			}
			printf("%d\n",ans);
		}
	}
	return 0;
}


2.后缀自动机做法:

对n个后缀依次建立后缀自动机,由于自动机的更新是动态的,每更新一次就可以求得答案。复杂度O(n^2),最后跑了811ms,且代码复杂度比后缀数组小不少。


#include <cstdio>
#include <iostream>
#include <string.h>
#define mem0(a) memset(a,0,sizeof(a))
#define meminf(a) memset(a,0x3f,sizeof(a))
using namespace std;
typedef long long ll;
typedef long double ld;
typedef double db;
const int maxn=2005,maxk=26,inf=0x3f3f3f3f;  
const ll llinf=0x3f3f3f3f3f3f3f3f;
int ans[maxn][maxn];
char s[maxn];

class SAM {
	public:
	void init() {
		num=last=tot=0;
		a[0].len=0;a[0].fa=-1;
		for (int i=0;i<maxk;i++) a[0].son[i]=-1;
	}
	int update (char ch) {
		int now=++num,p,c=ch-'a';
		a[now].len=a[last].len+1;
		memset(a[now].son,-1,sizeof(a[now].son));
		for (p=last;p!=-1&&a[p].son[c]==-1;p=a[p].fa)
		    a[p].son[c]=now;
		if (p==-1) a[now].fa=0; else {
			int q=a[p].son[c];
			if (a[p].len+1==a[q].len) {
				a[now].fa=q;
			} else {
				int ne=++num;
				a[ne].len=a[p].len+1;
				memcpy(a[ne].son,a[q].son,sizeof(a[q].son));
				a[ne].fa=a[q].fa;
				for (;p!=-1&&a[p].son[c]==q;p=a[p].fa) 
					a[p].son[c]=ne;
				a[q].fa=a[now].fa=ne;
			}
		}
		last=now;
		tot+=a[now].len-a[a[now].fa].len;
		return tot;
	}
	private:
	int num,last,tot;
	struct node{
		int len,fa;
		int son[maxk];
	} a[maxn*2];
};
SAM sa;

int main() {
	int cas;
	scanf("%d",&cas);
	while (cas--) {
		scanf("%s",s);
		int len=strlen(s),i,j,q,x,y;
		for (i=0;i<len;i++) {
			sa.init();
			for (j=i;j<len;j++) {
				ans[i+1][j+1]=sa.update(s[j]);
			}
		}
		scanf("%d",&q); 
		for (i=1;i<=q;i++) {
			scanf("%d%d",&x,&y);
			printf("%d\n",ans[x][y]);
		}
	}
	return 0;
}




  • 0
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值