题目链接:点击这里
求一个串每一个后缀和他最长匹配的他之前的后缀.
题意: 给定一个串, 下标从0开始, 从这个下标之前的下标中找到一个最小下标使得这两个后缀的最长公共前缀最大, 设这个值为L. 如果存在, 下标+L, 并输出L和他之前的这个下标; 否则输出-1, 和当前下标字符的ASCLL码, 当前下标+1.
后缀的前缀这个东西看着就很后缀数组, 于是就用后缀数组来搞.
第一步是找到L. 可以维护一个set, 把他之前的rank值都扔到set里面, 然后找到前后最接近当前下标的两个rank值, 从而确定一个最大的L.
现在知道了L, 接下来需要计算
height[ranki]
最长可以前后延伸到哪里, 所以就二分height数组的下标, 每次用rmq查询区间的最小值是不是大于等于L. 这样就找到了一个最大的区间
(l,r)
满足区间里面任何一个后缀和当前后缀的公共前缀长度>=L.
最后, 再找到的区间中找到一个最小的下标就是答案. 细节多到炸~~
#include <iostream>
#include <cstdio>
#include <cstring>
#include <queue>
#include <cmath>
#include <string>
#include <vector>
#include <algorithm>
#include <map>
#include <set>
#define maxn 200005
#define rank Rank
using namespace std;
int t1[maxn],t2[maxn],c[maxn];
bool cmp(int *r,int a,int b,int l)
{
return r[a] == r[b] && r[a+l] == r[b+l];
}
void da(int str[],int sa[],int rank[],int height[],int n,int m)
{
n++;
int i, j, p, *x = t1, *y = t2;
//第一轮基数排序,如果s的最大值很大,可改为快速排序
for(i = 0; i < m; i++)c[i] = 0;
for(i = 0; i < n; i++)c[x[i] = str[i]]++;
for(i = 1; i < m; i++)c[i] += c[i-1];
for(i = n-1; i >= 0; i--)sa[--c[x[i]]] = i;
for(j = 1; j <= n; j <<= 1)
{
p = 0;
//直接利用sa数组排序第二关键字
for(i = n-j; i < n; i++)y[p++] = i;//后面的j个数第二关键字为空的最小
for(i = 0; i < n; i++)if(sa[i] >= j)y[p++] = sa[i] - j;
//这样数组y保存的就是按照第二关键字排序的结果
//基数排序第一关键字
for(i = 0; i < m; i++)c[i] = 0;
for(i = 0; i < n; i++)c[x[y[i]]]++;
for(i = 1; i < m; i++)c[i] += c[i-1];
for(i = n-1; i >= 0; i--)sa[--c[x[y[i]]]] = y[i];
//根据sa和x数组计算新的x数组
swap(x,y);
p = 1;
x[sa[0]] = 0;
for(i = 1; i < n; i++)
x[sa[i]] = cmp(y,sa[i-1],sa[i],j)?p-1:p++;
if(p >= n)break;
m = p;//下次基数排序的最大值
}
int k = 0;
n--;
for(i = 0; i <= n; i++)rank[sa[i]] = i;
for(i = 0; i < n; i++)
{
if(k) k--;
j = sa[rank[i]-1];
while(str[i+k] == str[j+k])
k++;
height[rank[i]] = k;
}
}
int rank[maxn], height[maxn];
int str[maxn];
char s[maxn];
int sa[maxn];
int n;
int dp_sa[maxn][21];
int dp_height[maxn][21];
void rmq_init () {
for (int i = 0; i < n; i++) dp_sa[i][0] = sa[i+1];
for (int j = 1; (1<<j) <= n; j++) {
for (int i = 0; i+(1<<j)-1 < n; i++) {
dp_sa[i][j] = min (dp_sa[i][j-1], dp_sa[i+(1<<(j-1))][j-1]);
}
}
for (int i = 0; i < n; i++) dp_height[i][0] = height[i+1];
for (int j = 1; (1<<j) <= n; j++) {
for (int i = 0; i+(1<<j)-1 < n; i++) {
dp_height[i][j] = min (dp_height[i][j-1], dp_height[i+(1<<(j-1))][j-1]);
}
}
}
int rmq (int l, int r, int op) {
//op = 0:计算sa[l,r]区间中的最小值
//op = 1:计算height[l,r]区间的最小值
if (l > r) swap (l, r);
l--, r--;
int k = 0;
while ((1<<(k+1)) <= r-l+1) k++;
if (op == 0)
return min (dp_sa[l][k], dp_sa[r-(1<<k)+1][k]);
else
return min (dp_height[l][k], dp_height[r-(1<<k)+1][k]);
}
set <int> gg;
set <int>::iterator it;
#define INF 11111111
void solve () {
gg.clear ();
rmq_init ();
printf ("-1 %d\n", str[0]);
gg.insert (rank[0]);
for (int i = 1; i < n;) {
it = gg.upper_bound (rank[i]);
int L, R, len = 0;
if (it != gg.end ()) {
len = rmq (rank[i]+1, *it, 1);
if (it != gg.begin ()) {
it--;
len = max (len, rmq ((*it)+1, rank[i], 1));
}
}
else {
it--;
len = rmq ((*it)+1, rank[i], 1);
}
if (len == 0) {
printf ("-1 %d\n", str[i]);
}
else {
int ll, rr, mid;
L = 1, R = n;
if (rank[i] != 1) {
ll = 2, rr = rank[i];
while (rr-ll > 1) {
mid = (ll+rr) >> 1;
if (rmq (mid, rank[i], 1) >= len) rr = mid;
else ll = mid;
}
if (rmq (ll, rank[i], 1) >= len) L = ll-1;
else if (rmq (rr, rank[i], 1) >= len) L = rr-1;
else L = rank[i];
}
if (rank[i] != n) {
ll = rank[i]+1, rr = n;
while (rr-ll > 1) {
mid = (ll+rr) >> 1;
if (rmq (rank[i]+1, mid, 1) >= len) ll = mid;
else rr = mid;
}
if (rmq (rank[i]+1, rr, 1) >= len) R = rr;
else if (rmq (rank[i]+1, ll, 1) >= len) R = ll;
else R = rank[i];
}
int pos = INF;
if (rank[i]-1 >= L)
pos = min (rmq (L, rank[i]-1, 0), pos);
if (rank[i]+1 <= R)
pos = min (rmq (rank[i]+1, R, 0), pos);
printf ("%d %d\n", len, pos);
}
gg.insert (rank[i]);
if (!len) i++;
else {
for (int j = i+1; j < i+len; j++)
gg.insert (rank[j]);
i += len;
}
}
}
int main () {
int t, kase = 0;
scanf ("%d", &t);
while (t--) {
scanf ("%s", s);
n = strlen (s);
for (int i = 0; i < n; i++) str[i] = s[i]; str[n] = 0;
da (str, sa, rank, height, n, 233);
printf ("Case #%d:\n", ++kase);
solve ();
}
return 0;
}
/*
100
aabaaaab
aabcd
aabbabc
abb
-1 97
1 0
-1 98
2 0
3 0
-1 97
1 0
-1 98
-1 99
-1 100
-1 97
1 0
-1 98
1 2
2 1
-1 99
-1 97
-1 98
1 1
*/