Front compression
Time Limit: 5000/5000 MS (Java/Others) Memory Limit: 102400/102400 K (Java/Others)Total Submission(s): 986 Accepted Submission(s): 404
Problem Description
Front compression is a type of delta encoding compression algorithm whereby common prefixes and their lengths are recorded so that they need not be duplicated. For example:
The size of the input is 43 bytes, while the size of the compressed output is 40. Here, every space and newline is also counted as 1 byte.
Given the input, each line of which is a substring of a long string, what are sizes of it and corresponding compressed output?
![](https://i-blog.csdnimg.cn/blog_migrate/fe5b05bd3f66d649335474cddbc5e96b.jpeg)
The size of the input is 43 bytes, while the size of the compressed output is 40. Here, every space and newline is also counted as 1 byte.
Given the input, each line of which is a substring of a long string, what are sizes of it and corresponding compressed output?
Input
There are multiple test cases. Process to the End of File.
The first line of each test case is a long string S made up of lowercase letters, whose length doesn't exceed 100,000. The second line contains a integer 1 ≤ N ≤ 100,000, which is the number of lines in the input. Each of the following N lines contains two integers 0 ≤ A < B ≤ length(S), indicating that that line of the input is substring [A, B) of S.
The first line of each test case is a long string S made up of lowercase letters, whose length doesn't exceed 100,000. The second line contains a integer 1 ≤ N ≤ 100,000, which is the number of lines in the input. Each of the following N lines contains two integers 0 ≤ A < B ≤ length(S), indicating that that line of the input is substring [A, B) of S.
Output
For each test case, output the sizes of the input and corresponding compressed output.
Sample Input
frcode 2 0 6 0 6 unitedstatesofamerica 3 0 6 0 12 0 21 myxophytamyxopodnabnabbednabbingnabit 6 0 9 9 16 16 19 19 25 25 32 32 37
Sample Output
14 12 42 31 43 40
Author
Zejun Wu (watashi)
Source
题意:按计算要求写成前缀表达形式的输出有多少个字符
题解:用后缀数组可以解之(其实这题暴力就可以了。。)
#include<stdio.h>
#include<math.h>
#include<string.h>
#define MAXN 100000
int bucket[MAXN+8],dp[20][MAXN+8];
int rankx[MAXN+8],ranky[MAXN+8];
int sa[MAXN+8],height[MAXN+8];
int sta[MAXN+8],fin[MAXN+8],r[MAXN+8];
char s[MAXN+8];
int MIN(int x,int y){ return x<y?x:y; }
int cmp(int *r,int a,int b,int l)
{
return r[a]==r[b]&&r[a+l]==r[b+l];
}
void suffix_array(int *s,int *sa,int n,int m)
{
int i,j,p,*x=rankx,*y=ranky,*t;
for(i=0;i<m;i++) bucket[i]=0;
for(i=0;i<n;i++) bucket[x[i]=s[i]]++;
for(i=1;i<m;i++) bucket[i]+=bucket[i-1];
for(i=n-1;i>=0;i--) sa[--bucket[x[i]]]=i;
for(j=1,p=1;p<n;j*=2,m=p)
{
for(p=0,i=n-j;i<n;i++) y[p++]=i;
for(i=0;i<n;i++) if(sa[i]>=j) y[p++]=sa[i]-j;
for(i=0;i<m;i++) bucket[i]=0;
for(i=0;i<n;i++) bucket[x[y[i]]]++;
for(i=1;i<m;i++) bucket[i]+=bucket[i-1];
for(i=n-1;i>=0;i--) sa[--bucket[x[y[i]]]]=y[i];
t=x,x=y,y=t;
for(p=i=1,x[sa[0]]=0;i<n;i++)
x[sa[i]]=cmp(y,sa[i-1],sa[i],j)?p-1:p++;
}
}
void calculate_height(int *r,int *sa,int *x,int *height,int n)
{
int i,j,k=0;
for(i=0;i<=n;i++) x[sa[i]]=i;
for(i=0;i<n;height[x[i++]]=k)
{
if(x[i]==0) continue; //不加这句应该下面的循环就会有sa[-1],不过越界似乎没有压力,表示也能过。。。
for(k?k--:0,j=sa[x[i]-1];r[i+k]==r[j+k];k++);
}
}
void initializer_rmq(int *height,int n)
{
int i,j,po2[28];
for(i=1;i<=n;i++) dp[0][i]=height[i];
for(po2[0]=i=1;i<=23;i++) po2[i]=po2[i-1]*2;
for(i=1;po2[i]<=n;i++)
{
for(j=1;j+po2[i]-1<=n;j++)
dp[i][j]=MIN(dp[i-1][j],dp[i-1][j+po2[i-1]]);
}
}
int ask_rmq(int a,int b)
{
int k,t;
if(a>b) t=a,a=b,b=t;
k=log(b-(a+1)+1.0)/log(2.0);
return MIN(dp[k][a+1],dp[k][b-(1<<k)+1]);
}
int calculate_len(int x)
{
int res=0;
while(x)
{
res++;
x/=10;
}
return res==0?1:res;
}
int main()
{
int i,n,temp;
long long res1,res2;
//freopen("t.txt","r",stdin);
while(scanf("%s",s)>0)
{
for(i=0,temp=strlen(s),r[temp]=0;i<temp;i++) r[i]=s[i]-'a'+1;
suffix_array(r,sa,strlen(s)+1,30);
calculate_height(r,sa,rankx,height,strlen(s));
initializer_rmq(height,strlen(s));
scanf("%d",&n);
for(res1=res2=i=0;i<n;i++)
{
scanf("%d%d",sta+i,fin+i);
res1+=fin[i]-sta[i]+1;
if(i==0)
{
res2+=fin[i]-sta[i]+3;
continue;
}
temp=MIN(fin[i]-sta[i],fin[i-1]-sta[i-1]);
if(sta[i]==sta[i-1])
{
res2+=calculate_len(temp)+fin[i]-sta[i]-temp+2;
}
else
{
temp=MIN(temp,ask_rmq(rankx[sta[i]],rankx[sta[i-1]]));
res2+=calculate_len(temp)+fin[i]-sta[i]-temp+2;
}
}
printf("%I64d %I64d\n",res1,res2);
}
return 0;
}