Frontcompression
Time Limit: 5000/5000 MS(Java/Others) Memory Limit: 102400/102400 K(Java/Others)
Total Submission(s): 2609 Accepted Submission(s): 880
Problem Description
The size of the input is 43 bytes, while the size of the compressed output is 40.Here, every space and newline is also counted as 1 byte.
Given the input, each line of which is a substring of a long string, what aresizes of it and corresponding compressed output?
Input
There are multipletest cases. Process to the End of File.
The first line of each test case is a long string S made up of lowercaseletters, whose length doesn't exceed 100,000. The second line contains ainteger 1 ≤ N ≤ 100,000, which is the number of lines in the input. Each of thefollowing N lines contains two integers 0 ≤ A < B ≤ length(S), indicatingthat that line of the input is substring [A, B) of S.
Output
For each testcase, output the sizes of the input and corresponding compressed output.
Sample Input
frcode
2
0 6
0 6
unitedstatesofamerica
3
0 6
0 12
0 21
myxophytamyxopodnabnabbednabbingnabit
6
0 9
9 16
16 19
19 25
25 32
32 37
Sample Output
14 12
42 31
43 40
Author
Zejun Wu (watashi)
Source
2013 Multi-University Training Contest 9
但是,你可能会问,这个height的最小值是后缀的lcp,而这里的子串并不一定是后缀。其实还是一样,所有的子串一定是某一个后缀的前缀,所以我们直接用它对应的后缀来做lcp即可。唯一的区别就是,如果lcp长度大于前一个或者后一个任意一个的时候,我们要去小的那个,及lcp是三者的最小值。知道两个子串的lcp之后,就可以知道前面那个数字的占位以及剩下字符串的占位,直接统计即可。
#include<bits/stdc++.h>
#define LL long long
#define N 100010
using namespace std;
struct Suffix_Array
{
int sa[N],Rank[N],h[N],n;
int xx[N],yy[N],c[N]; char *s;
bool cmp(int *s,int x,int y,int k)
{return (s[x]==s[y])&&(s[x+k]==s[y+k]);}
void ins(char *str) {s=str;n=strlen(s)+1;}
void DA()
{
memset(c,0,sizeof(c));
int *x=xx,*y=yy,m=130,*t,i;
for(i=0;i<n;i++) x[i]=s[i];
for(i=0;i<n;i++) c[x[i]]++;
for(i=1;i<m;i++) c[i]+=c[i-1];
for(i=n-1;i>=0;i--) sa[--c[x[i]]]=i;
for(int k=1,tot=0;tot<n;k<<=1,m=tot)
{
memset(c,0,sizeof(c));
for(i=0;i<n;i++) c[x[i]]++;
for(i=1;i<m;i++) c[i]+=c[i-1];
for(i=n-k,tot=0;i<n;i++) y[tot++]=i;
for(i=0;i<n;i++) if (sa[i]>=k) y[tot++]=sa[i]-k;
for(i=n-1;i>=0;i--) sa[--c[x[y[i]]]]=y[i];
for(i=tot=1,t=x,x=y,y=t,x[sa[0]]=0;i<n;i++)
x[sa[i]]=cmp(y,sa[i-1],sa[i],k)?tot-1:tot++;
}
}
void cal_height()
{
int i,j,k=0;
for(i=1;i<n;i++) Rank[sa[i]]=i;
for(i=0;i<n-1;h[Rank[i++]]=k)
for(k?k--:0,j=sa[Rank[i]-1];s[i+k]==s[j+k];k++);
}
} SA;
struct ST
{
struct node{int min,l,r;} tree[N<<2];
inline void build(int i,int l,int r)
{
int mid=(l+r)>>1;
tree[i].r=r; tree[i].l=l;
if (l==r){tree[i].min=SA.h[l];return;}
build(i<<1,l,mid);build(i<<1|1,mid+1,r);
tree[i].min=min(tree[i<<1].min,tree[i<<1|1].min);
}
inline int getmin(int i,int l,int r)
{
if (l>r) return 0;
if (tree[i].l==l&&tree[i].r==r) return tree[i].min;
int mid=(tree[i].l+tree[i].r)>>1;
if (mid>=r) return getmin(i<<1,l,r);
else if (mid<l) return getmin(i<<1|1,l,r);
else return min(getmin(i<<1,l,mid),getmin(i<<1|1,mid+1,r));
}
} seg;
char s[N];
int Count(int lcp)
{
int res=0;
while(lcp)res++,lcp/=10;
return max(res,1);
}
int main()
{
while(~scanf("%s",s))
{
int n;
SA.ins(s);
scanf("%d",&n);
int cur_b,cur_e;
LL ans1=0,ans2=0;
int pre_b=-1,pre_e=-1;
SA.DA();SA.cal_height();
seg.build(1,1,strlen(s));
while(n--)
{
scanf("%d%d",&cur_b,&cur_e);
ans2+=cur_e-cur_b+1;
if (pre_b>-1&&pre_e>-1)
{
int l=SA.Rank[pre_b];
int r=SA.Rank[cur_b];
if (l>r) swap(l,r);
int tmp=seg.getmin(1,l+1,r);
if (l==r) tmp=cur_e-cur_b;
int lcp=min(cur_e-cur_b,min(tmp,pre_e-pre_b));
ans1+=cur_e-cur_b-lcp+2+Count(lcp);
} else ans1+=3+cur_e-cur_b; pre_b=cur_b,pre_e=cur_e;
}
printf("%lld %lld\n",ans2,ans1);
}
return 0;
}