5.
题目描述
Eddy likes to play with string which is a sequence of characters. One day, Eddy has played with a string S for a long time and wonders how could make it more enjoyable. Eddy comes up with following procedure:
-
For each i in [0,|S|-1], let Si be the substring of S starting from i-th character to the end followed by the substring of first i characters of S. Index of string starts from 0.
-
Group up all the Si. Si and Sj will be the same group if and only if Si=Sj.
-
For each group, let Lj be the list of index i in non-decreasing order of Si in this group.
-
Sort all the Lj by lexicographical order.
Eddy can't find any efficient way to compute the final result. As one of his best friend, you come to help him compute the answer!
输入描述:
Input contains only one line consisting of a string S.
1≤ |S|≤ 106
S only contains lowercase English letters(i.e. ).
输出描述:
First, output one line containing an integer K indicating the number of lists.
For each following K lines, output each list in lexicographical order.
For each list, output its length followed by the indexes in it separated by a single space.
示例1
输入
复制
abab
输出
复制
2
2 0 2
2 1 3
示例2
输入
复制
deadbeef
输出
复制
8
1 0
1 1
1 2
1 3
1 4
1 5
1 6
1 7
题目大意:
给一个字符串S,将其按位置i分为两段,把前半段拼接到后面。得到len(S)个字符串,把相同的分为一组。输出每组的个数和组内每个字符串的首位置(原来的i)。
想法:
首先想到的是模拟,把所有串生成出来,然后判重。但这样用set会超内存。然后是找最小的被重复串(百度来说是循环节),百度一下是说用KMP做。没想到KMP还可以这样用啊。后来去隔壁听题解说随便套个后缀数组算法就好了(DA不行,DS3/SAIS可以),不明觉厉,回来赶紧补一下。
然后发现后缀数组真的是个大坑啊啊,以后再填。
思路:
KMP:
1、通过next数组找到最小循环节,如果存在,那么len-next[len] == 最小循环节长度x,那么 len % (len - next[len]) == 0 说明字符串都是由最小循环节构成的。组数就是循环节长度x,各元素的位置就是循环节中位置i+kx 。
后缀数组:
1、将字符串复制一遍放在后边,那么第一个想法中模拟、去重的问题就成了,新字符串的后缀的最长公共前缀(LCP)问题,如果len(LCP)>=n(原字符串长度),那么说明这两个构造后的字符串相同,放在一组中。
2、后缀数组sa[]存按字典序排序后的后缀首位置 :(DC3/SAIS)nlogn --> rank[]:n --> height[]:n --> LCP=RMQ(height,rank[j]+1,rank[k]):nlogn。
感觉用后缀数组仿佛杀鸡用牛刀。。。还有这个复杂度,据说常数有4,很大。。。不过套板子就好了(也没试过,在此记录一下)。
标准题解:
英文。。。
Main Idea: String, KMP, string cycle, hash
Solution 1: We can directly compute hash value of each S_i and group them by has values. Note that the hash space should be big enough to avoid hash collision.
Overal time complexity: O(|S|) Overal space complexity: O(|S|)
Solution 2: Notice that if S_i = S_j for some i<J. S is cycle by j-i. Thus, we just need to find the minimum cycle k. Then, S_i will be equal to S_{i mod k}. Finding cycle of string can be done by hash or KMP.
Overal time complexity: O(|S|) Overal space complexity: O(|S|)
AC代码:
牛客网讨论区的。。。
https://www.nowcoder.com/discuss/88474?type=101&order=0&pos=10&page=0
KMP
#include <bits/stdc++.h>
using namespace std;
int nex[1000050];
char s[1000050];
void get_next()
{
nex[0]=0;
nex[1]=0;
int m=strlen(s);
for(int i=1; i<m; i++)
{
int j=nex[i];
while(j&&s[i]!=s[j]) j=nex[j];
nex[i+1]=s[i]==s[j]?j+1:0;
}
}
int main()
{
scanf("%s",s);
get_next();
int n=strlen(s);
//for(int i=1;i<=n;i++)
//printf("%d %d\n",i,nex[i]);
int x=n-nex[n];
if(n%x!=0)
{
printf("%d\n",n);
for(int i=0; i<n; i++)
printf("1 %d\n",i);
}
else
{
printf("%d\n",x);
for(int i=0; i<x; i++)
{
printf("%d",n/x);
for(int j=i;j<n;j+=x)
printf(" %d",j);
printf("\n");
}
}
return 0;
}
后缀数组:
#include<cstdio>
#include<algorithm>
#include<queue>
#include<iostream>
#include<cmath>
#include<cstring>
using namespace std;
#define F(x) ((x)/3+((x)%3==1?0:tb))
#define G(x) ((x)<tb?(x)*3+1:((x)-tb)*3+2)
const int MAXN = 2e6 + 5;//n*10
int sa[MAXN*3];
int Rank[MAXN];
int Height[MAXN];
int n;
char s[MAXN*3];
int r[MAXN*3];
int wa[MAXN*3],wb[MAXN*3],wv[MAXN*3];
int wws[MAXN*3];
void sort(int *r,int *a,int *b,int n,int m)
{
int i;
for(i=0;i<n;i++) wv[i]=r[a[i]];
for(i=0;i<m;i++) wws[i]=0;
for(i=0;i<n;i++) wws[wv[i]]++;
for(i=1;i<m;i++) wws[i]+=wws[i-1];
for(i=n-1;i>=0;i--) b[--wws[wv[i]]]=a[i];
return;
}
int c0(int *r,int a,int b)
{return r[a]==r[b]&&r[a+1]==r[b+1]&&r[a+2]==r[b+2];}
int c12(int k,int *r,int a,int b)
{if(k==2) return r[a]<r[b]||r[a]==r[b]&&c12(1,r,a+1,b+1);
else return r[a]<r[b]||r[a]==r[b]&&wv[a+1]<wv[b+1];}
void dc3(int *r,int *sa,int n,int m)
{
int i,j,*rn=r+n,*san=sa+n,ta=0,tb=(n+1)/3,tbc=0,p;
r[n]=r[n+1]=0;
for(i=0;i<n;i++) if(i%3!=0) wa[tbc++]=i;
sort(r+2,wa,wb,tbc,m);
sort(r+1,wb,wa,tbc,m);
sort(r,wa,wb,tbc,m);
for(p=1,rn[F(wb[0])]=0,i=1;i<tbc;i++)
rn[F(wb[i])]=c0(r,wb[i-1],wb[i])?p-1:p++;
if(p<tbc) dc3(rn,san,tbc,p);
else for(i=0;i<tbc;i++) san[rn[i]]=i;
for(i=0;i<tbc;i++) if(san[i]<tb) wb[ta++]=san[i]*3;
if(n%3==1) wb[ta++]=n-1;
sort(r,wb,wa,ta,m);
for(i=0;i<tbc;i++) wv[wb[i]=G(san[i])]=i;
for(i=0,j=0,p=0;i<ta && j<tbc;p++)
sa[p]=c12(wb[j]%3,r,wa[i],wb[j])?wa[i++]:wb[j++];
for(;i<ta;p++) sa[p]=wa[i++];
for(;j<tbc;p++) sa[p]=wb[j++];
return;
}
int K;
void calHeight(int *r, int *sa, int n)
{
int i, j, k = 0;
for (i = 1; i <= n; ++i) Rank[sa[i]] = i;
for (i = 0; i < n; Height[Rank[i++]] = k)
for (k ? k-- : 0, j = sa[Rank[i] - 1]; r[i + k] == r[j + k]; ++k);
return;
}
vector<int> q[MAXN];
bool vis[MAXN];
struct ac{
int index, id;
}b[MAXN];
int cmp1(ac d, ac f)
{
return d.id < f.id;
}
inline void solve()
{
int cnt = 0, f = 0;
for(int i = 1; i <= n; i++)
{
if(Height[i] >= K)
{
if(!f) //如果是一个新种类
{
f = 1;
cnt++;
q[cnt].push_back(sa[i - 1]);
vis[sa[i - 1]] = 1;
}
q[cnt].push_back(sa[i]);
vis[sa[i]] = 1;
}
else f = 0;
}
for(int i = 0; i < K; i++) //统计只一次的
{
if(!vis[i])
q[++cnt].push_back(i);
}
for(int i = 1; i <= cnt; i++) sort(q[i].begin(), q[i].end());
for(int i = 1; i <= cnt; i++)
{
b[i].id = q[i][0];
b[i].index = i;
}
sort(b + 1, b + cnt + 1, cmp1);
printf("%d\n", cnt);
for(int i = 1; i <= cnt; i++)
{
int index = b[i].index;
int len = q[index].size();
printf("%d ", len);
for(int j = 0; j < len; j++)
printf("%d%c", q[index][j], j == len - 1 ? '\n' : ' ');
}
}
int main()
{
scanf("%s",s);
int Max=-1;
n=strlen(s);
for(int i = 0; i < n - 1; i++) s[i + n] = s[i];
K = n;
n += n - 1;
for(int i=0;i<n;i++){
r[i]=s[i];
if(r[i]>Max)Max=r[i];
}
r[n]=0;
dc3(r,sa,n+1,Max+1);
calHeight(r,sa,n);
solve();
return 0;
}
参考:
《算法入门经典 训练指南》
https://www.nowcoder.com/discuss/88474?type=101&order=0&pos=10&page=0