大数据下测试后缀数组的SA和HEIGHT数组的正确性的题目。
也决定了我今后的模板。
#include <cstdio>
#include <iostream>
const int max_strlen = 200000 + 10;
char text[max_strlen];
int sa[max_strlen], tub[200], wa[max_strlen], wb[max_strlen], wv[max_strlen];
int R[max_strlen], height[max_strlen], rank[max_strlen];
int SA[max_strlen];
bool cmp(int *r, int a, int b, int l)
{return r[a] == r[b] && r[a + l] == r[b + l];}
void da(int *r, int *sa, int n, int m)
{
int *x = wa, *y = wb, *t;
int i, j, p;
for (i = 0; i != m; ++ i) tub[i] = 0;
for (i = 0; i != n; ++ i) tub[x[i] = r[i]] ++;
for (i = 1; i != m; ++ i) tub[i] += tub[i - 1];
for (i = n - 1; i >= 0; -- i) sa[ -- tub[x[i]]] = i;
for (j = 1, p = 1; p != n; m = p, j *= 2)
{
for (p = 0, i = n - j; i != n; ++ i) y[p ++] = i;
for (i = 0; i != n; ++ i) if (sa[i] >= j) y[p ++] = sa[i] - j;
for (i = 0; i != n; ++ i) wv[i] = x[y[i]];
for (i = 0; i != m; ++ i) tub[i] = 0;
for (i = 0; i != n; ++ i) ++tub[wv[i]];
for (i = 1; i != m; ++ i) tub[i] += tub[i - 1];
for (i = n - 1; i >= 0; -- i) sa[-- tub[wv[i]]] = y[i];
for (t = x, x =y, y = t, x[sa[0]] = 0, p = 1, i = 1; i != n; ++ i)
x[sa[i]] = cmp(y, sa[i], sa[i - 1], j) ? p - 1 : p ++;
}
}
void getheight(int *r, int *sa, int n)
{
int i, j, k = 0;
for (i = 1; i <= n; ++ i) rank[sa[i]] = i;
for (i = 0; i != n; height[rank[i ++]] = k)
for (k ? k -- : 0, j = sa[rank[i] - 1]; r[i + k] == r[j + k]; ++ k);
}
int main()
{
int LEN = 0;
while (1)
{
text[LEN ++ ] = getchar();
if (text[LEN - 1] == '\n') break;
}
--LEN;
for (int i = 0; i < LEN; ++ i) R[i] = text[i];
R[LEN] = 0;
da(R,SA,LEN+1,150);
getheight(R,SA,LEN);
for (int i = 1; i <= LEN - 1; ++ i) std::cout<<SA[i] + 1<<" ";
std::cout<<SA[LEN] + 1<<std::endl;
for (int i = 1; i <= LEN - 1 ; ++ i) std::cout<<height[i]<<" ";
std::cout<<height[LEN]<<std::endl;
}