曾经有几次想写后缀数组,结果被论文中缩成豆腐块的程序吓到了.......
现在看懂了之后, 被这个程序写的丑陋吓到了。
丑虽丑,但是写的还是很精巧的, 照着打吧。
后缀数组其实就是吧一个长度为n的字符串(当然也可以是其他的,只要能比大小)的所有后缀排序后存在数组里。
打裸是O(n*n*log(n))的。
其实如果以每个点为开头,分别求出向后延伸1, 2, 4 .... 的排列顺序,后者是可以有前者推出的,配合基数排序,就可以把复杂度降低到O (n log (n));
当然, 代码很精巧,也很丑。
把求出的后缀排序后的首字母放在sa数组中, 用sa 退出height (height (i) = suffix(i)与 suffix(pre-rank(i)的最长公共前缀), 有这么一个定理 height[ rank[i ]] >= height[ rank[i -1]]-1 , 按照rank的顺序来求, 用变量存上一个rank的height, 由于这个变量顶多减少n次, 类似kmp的复杂度分析,这是o(n) 的。
用height就可以求很多东西了。
poj 1743 求不可重叠最长重复字串。
求出height后,二分答案,把height数组 分成若干份, 每份内的height都大于二分的数,观察每份内,是否存在解即可。
nlogn
当然,这道题规定,如果一段数同时减去一个数后, 与另一段数相同,也算作相同,这里就要用差分思想解决了。
# include <cstdlib>
# include <cstdio>
# include <cmath>
# include <cstring>
using namespace std;
const int maxn = 100000, oo = 1073741819;
int pret[maxn], s[maxn], height[maxn], sa[maxn], stmp[maxn], lis[maxn], rank[maxn], rk[maxn];
int p, n, l, r, mid, i;
void read()
{
int i;
memset(s, 0, sizeof(s));
memset(sa, 0, sizeof(sa));
scanf("%d", &n);
for (i = 1; i<= n; i++)
scanf("%d", &s[i]);
for (i = 1; i< n; i++)
s[i] = s[i+1]-s[i];
s[n] = 0; n--;
for (i = 1; i<= n; i++)
s[i] += 89;
}
void work_suffix(int n, int m)
{
int i, j;
memset(pret, 0, sizeof(pret));
for (i = 1; i <= n; i++) pret[rk[i]=s[i]]++;
for (i = 1; i <= m; i++) pret[i]+= pret[i-1];
for (i = n; i >= 1; i--) sa[pret[rk[i]]--] = i;
for (j = 1, p = 0; p < n; j <<=1, m = p)
{
for (p = 0, i = n-j+1; i <=n; i++) lis[++p] = i;
for (i = 1; i <=n ; i++) if (sa[i]>j) lis[++p] = sa[i] - j;
memset(pret, 0, sizeof(pret));
for (i = 1; i <=n ; i++) stmp[i] = rk[lis[i]];
for (i = 1; i <=n ; i++) pret[stmp[i]]++;
for (i = 1; i <=m ; i++) pret[i]+= pret[i-1];
for (i = n; i >=1 ; i--) sa[pret[stmp[i]]--] = lis[i];
for (i = 1; i <=n ; i++) lis[i] = rk[i];
for (p = 1, rk[sa[1]] = 1, i = 2; i <= n; i++)
rk[sa[i]] = (lis[sa[i]] == lis[sa[i-1]] && lis[sa[i]+j] == lis[sa[i-1]+j])? p:++p;
}
}
void work_height()
{
int i, j, p = 0;
for (i = 1; i <= n; i++) rank[sa[i]] = i;
for (i = 1; i < n; height[rank[i++]] = p)
for (p?p--:0,j = sa[rank[i]-1]; j+p <=n+1 && i+p <=n+1 && s[j+p]==s[i+p]; p++);
}
bool check(int mid)
{
int ll,rr;
for (i = 1; i <= n; i++)
{
if (height[i] < mid) ll = oo, rr = -oo;
ll = ll > sa[i] ? sa[i]:ll;
rr = rr < sa[i] ? sa[i]:rr;
if (rr- ll >= mid) return true;
}
return false;
}
int main()
{
freopen("1743.in", "r", stdin);
freopen("1743.out", "w", stdout);
for (;;)
{
read();
if (n == -1) break;
s[++n]= 0;
work_suffix(n, 1000);
work_height();
for (l = 0,r = n; l < r;)
{
if (check(mid = (l+r+1 >>1))) l = mid;
else r = mid-1;
}
l++;
printf("%d\n", l >= 5? l:0);
}
return 0;
}
poj 3261 求重复k次的最长重复字串, 可重叠。
有木有发现和前面一道题是一样的二分检验?
# include <cstdlib>
# include <cstdio>
# include <cstring>
using namespace std;
const int oo = 1073741819, maxn = 30000;
int rank[maxn], a[maxn], pret[1000000+5], rk[maxn];
int sa[maxn], lis[maxn], stmp[maxn], height[maxn];
int i , n, k;
void read()
{
int i;
scanf("%d%d", &n, &k);
for (i = 1; i <= n; i++)
scanf("%d", &a[i]), a[i]++;
a[++n]=0;
}
void work_suffix(int n, int m)
{
int i, j, p;
for (i=1; i<=m; i++) pret[i] = 0;
for (i=1; i<=n; i++) pret[rk[i] = a[i]]++;
for (i=1; i<=m; i++) pret[i]+= pret[i-1];
for (i=n; i>=1; i--) sa[pret[a[i]]--] = i;
for (j = 1, p = 0; p <n; j <<=1, m = p)
{
for (p = 0, i=n-j+1; i<=n; i++) lis[++p] = i;
for (i=1; i<=n; i++) if (sa[i] > j) lis[++p] = sa[i]-j;
for (i=1; i<=m; i++) pret[i] = 0;
for (i=1; i<=n; i++) stmp[i]=rk[lis[i]];
for (i=1; i<=n; i++) pret[stmp[i]]++;
for (i=1; i<=m; i++) pret[i]+=pret[i-1];
for (i=n; i>=1; i--) sa[pret[stmp[i]]--]=lis[i];
for (i=1; i<=n; i++) lis[i]=rk[i];
for (rk[sa[1]]=1, i=2, p=1; i<=n; i++)
rk[sa[i]] = (lis[sa[i]] == lis[sa[i-1]] && lis[sa[i]+j] == lis[sa[i-1]+j]) ? p:++p;
}
}
bool check(int mid)
{
int i, ll, rr;
for (i=1; i<=n; i++)
{
if (height[i] < mid ) ll = oo, rr = -oo;
if (i < ll) ll = i;
if (i > rr) rr = i;
if (rr - ll >= k-1) return true;
}
return false;
}
void work_height()
{
int i,j,p = 0;
for (i=1; i<=n; i++) rank[sa[i]] = i;
for (i=1, p=0; i<=n; height[rank[i++]] = p)
for (p?p--:0, j=sa[rank[i]-1];a[j+p]==a[i+p]; p++);
}
int main()
{
freopen("3261.in","r", stdin);
freopen("3261.out", "w", stdout);
read();
work_suffix(n, 1000000+5);
work_height();
int mid, l = 0, r = n;
for (;l < r;)
{
if (check(mid = (l+r+1 >> 1))) l = mid;
else r = mid-1;
}
printf("%d", l);
return 0;
}
poj 2406
给定字符串l,它是由字串s重复k次得到的,求k的最大值。
同样的求height, 枚举s的长度,首先strlen(l)% strlen(s) == 0, 其次, suffix(1)与suffix(1+strlen(s)) 的最长公共前缀 = n - strlen(s), 画个图就很明了了。
当然,这道题不是出给后缀数组的,是出个kmp的,数据出到了100 0000, 倍增是过不了的, 只能用丑陋的DC3
贴个kmp的算了:
# include <cstdlib>
# include <cstdio>
# include <cmath>
# include <cstring>
const int maxn = 1000000+5;
char s[maxn];
int n,i,j, next[maxn];
int main()
{
//freopen("2406.in", "r", stdin);
//freopen("2406.out", "w", stdout);
for (;;)
{
memset(s, 0, sizeof(s));
scanf("%s\n", s+1);
n = strlen(s+1);
if (s[1] == '.') return 0;
for (i = 2, j = 0; i <= n; next[i++] = j)
{
for (;j > 0 && s[j+1] != s[i];j = next[j]);
if (s[i] == s[j+1]) j++;
}
printf("%d\n", n % (n - next[n]) == 0 ? n / (n - next[n]): 1);
}
return 0;
}
poj 3294
求:对于n个字符串,至少出现n div 2+1 个字符串中的 最长字串。
把n个字符串用未出现且不同的字符连接, 同样的二分答案,分组,组内统计即可。
# include <cstdlib>
# include <cstdio>
# include <cstring>
using namespace std;
const int maxn = 100000+5000;
char s[10000+5];
int height[maxn], pret[maxn], sa[maxn], rk[maxn], rank[maxn], stmp[maxn], lis[maxn], a[maxn],id[maxn];
int ans[1000+5];
bool have[1000+5];
int lim, n, i, j, key, len, tot;
void read()
{
scanf("%d", &n); key = 30, tot = 0;
for (i = 1; i <= n; i++)
{
scanf("%s", s+1); len = strlen(s+1);
for (j = 1; j <=len; j++) a[++tot] = s[j] - 'a'+1, id[tot] = i;
a[++tot] = key++; id[tot] = 0;
}
a[tot] = 0;
}
void work_suffix(int n, int m)
{
int i, j, p;
for (i = 1; i<=m; i++) pret[i] = 0;
for (i = 1; i<=n; i++) pret[rk[i]=a[i]]++;
for (i = 1; i<=m; i++) pret[i]+= pret[i-1];
for (i = n; i>=1; i--) sa[pret[rk[i]]--] = i;
for (j = 1, p = 0; p < n; j <<=1, m =p)
{
for (p = 0, i = n-j+1; i<=n; i++) lis[++p] = i;
for (i = 1; i <=n; i++) if (sa[i]>j) lis[++p]=sa[i]-j;
for (i = 1; i <=m; i++) pret[i] = 0;
for (i = 1; i <=n; i++) stmp[i]= rk[lis[i]];
for (i = 1; i <=n; i++) pret[stmp[i]]++;
for (i = 1; i <=m; i++) pret[i] += pret[i-1];
for (i = n; i >=1; i--) sa[pret[stmp[i]]--] = lis[i];
for (i = 1; i <=n; i++) lis[i] = rk[i];
for (i = 2, p = 1, rk[sa[1]] = 1; i <=n; i++)
rk[sa[i]] = (lis[sa[i]] == lis[sa[i-1]] && lis[sa[i]+j] ==lis[sa[i-1]+j]) ? p:++p;
}
}
void work_height(int n)
{
int i ,j, p;
for (i = 1; i<=n; i++) rank[sa[i]] = i;
for (i = 1, p =0; i <=n; height[rank[i++]] = p)
for (p?p--:0, j = sa[rank[i]-1]; a[i+p]== a[j+p]; p++);
}
bool check(int mid)
{
int all, i;
if (mid == 0) return true;
memset(ans, 0, sizeof(ans));
for (i = 1; i <=tot; i++)
{
if (height[i] < mid)
{
memset(have, false, sizeof(have));
all = 0;
}
if (id[sa[i]] != 0)
{
int old = all;
if (have[id[sa[i]]] == false) all++, have[id[sa[i]]] = true;
if (all == n / 2 +1 && old < n / 2+1) ans[++ans[0]] = sa[i];
}
}
if (ans[0] != 0 ) return true;
return false;
}
int main()
{
for (;;)
{
read();
if (n == 0) break;
if (n == 1) printf("?\n");
if (n == 1) continue;
work_suffix(tot, key+1);
work_height(tot);
int l, r, mid; lim = n / 2 +1;
for (l = 0, r = tot; l <r;)
{
if (check(mid = l+r+1 >> 1)) l = mid;
else r = mid-1;
}
check(l);
if (l == 0) printf("?\n");
else
{
// printf("%d\n", l);
for (i = 1; i <= ans[0]; i++)
{
for (j = 1; j <=l; j++) printf("%c", a[ans[i]+j-1] + 'a' -1);
printf("\n");
}
}
printf("\n");
}
return 0;
}
poj 2774
# include <cstdlib>
# include <cstdio>
# include <cmath>
# include <cstring>
using namespace std;
const int maxn = 300000;
int height[maxn],id[maxn],pret[maxn],sa[maxn],stmp[maxn],rk[maxn],a[maxn],lis[maxn],rank[maxn];
int tot, i , n , ans;
char s[maxn / 2];
void read()
{
int i, len;
scanf("%s\n", s+1); len = strlen(s+1);
for (i = 1; i <= len; i++) a[++tot] = s[i]-'a' +1, id[tot] = 1;
scanf("%s\n", s+1); len = strlen(s+1);
for (a[++tot] = 30, i = 1; i <= len; i++) a[++tot] = s[i]-'a' +1, id[tot] = 2;
a[++tot] = 0;
}
void work_suffix(int n, int m)
{
int i , j, p;
for (i = 1; i <= m; i++) pret[i] = 0;
for (i = 1; i <= n; i++) pret[rk[i] = a[i]] ++;
for (i = 1; i <= m; i++) pret[i] += pret[i-1];
for (i = n; i >= 1; i--) sa[pret[rk[i]]--] = i;
for (p = 0, j = 1; p < n; j <<=1, m = p)
{
for (p = 0, i = n-j+1; i <= n; i++) lis[++p] = i;
for (i = 1; i <=n; i++) if (sa[i]> j) lis[++p] = sa[i] - j;
for (i = 1; i <=m; i++) pret[i] = 0;
for (i = 1; i <=n; i++) stmp[i] = rk[lis[i]];
for (i = 1; i <=n; i++) pret[stmp[i]]++;
for (i = 1; i <=m; i++) pret[i] += pret[i-1];
for (i = n; i >=1; i--) sa[pret[stmp[i]]--] = lis[i];
for (i = 1; i <=n; i++) lis[i] = rk[i];
for (i = 2, p = 1, rk[sa[1]] = 1; i<=n; i++)
rk[sa[i]] = (lis[sa[i]] == lis[sa[i-1]] && lis[sa[i]+j] == lis[sa[i-1]+j])? p:++p;
}
}
void work_height(int n)
{
int i ,j, p= 0;
for (i = 1; i <=n; i++) rank[sa[i]] = i;
for (i = 1; i <n; height[rank[i++]] = p)
for (p?p--:0, j = sa[rank[i]-1]; a[i+p] == a[j+p]; )
p++;
}
int main()
{
freopen("2774.in", "r", stdin);
freopen("2774.out", "w", stdout);
read();
work_suffix(tot, 50);
work_height(tot);
for (i = 2; i <= tot; i++)
if (id[sa[i]] != id[sa[i-1]])
ans = ans < height[i] ? height[i] : ans;
printf("%d", ans);
return 0;
}