后缀数组的应用——重复子串

重复子串:字符串 S 在字符串 T 中至少出现两次,则称 S T 的重复子串

一、最长可覆盖的重复子串

只需求出字符串的 SA [],Height [] 数组,答案就是 Height 中最大的那个。

二、最长不可覆盖的重复子串 (POJ 1743)

求出 Height 数组,二分答案。问题就转变成了是否存在两个长度为 K 的相同字符串,且不重叠。按 K 分组,若 Height [ i ] < K 就重新分一组(如图)。对于每一组,如果这组中的 SA 的最大值与最小值的差大于等于 K 就存在不重叠的,否则不存在。


#include <cstdio>
#include <algorithm>

using namespace std;

const int MAX_N = 20005;

int wa[MAX_N], wb[MAX_N], ws[MAX_N], wv[MAX_N];
int n, a[MAX_N], sa[MAX_N], r[MAX_N], h[MAX_N];

void da(int *a, int *sa, int n, int m)
{
	int *x = wa, *y = wb;
	for (int i = 0; i < m; i ++) ws[i] = 0;
	for (int i = 0; i < n; i ++) ws[x[i] = a[i]] ++;
	for (int i = 1; i < m; i ++) ws[i] += ws[i - 1];
	for (int i = n - 1; i >= 0; i --) sa[-- ws[x[i]]] = i;
	for (int k = 1; k <= n; k <<= 1){
		int p = 0;
		for (int i = n - k; i < n; i ++) y[p ++] = i;
		for (int i = 0; i < n; i ++) if (sa[i] >= k) y[p ++] = sa[i] - k;
		for (int i = 0; i < n; i ++) wv[i] = x[y[i]];
		for (int i = 0; i < m; i ++) ws[i] = 0;
		for (int i = 0; i < n; i ++) ws[wv[i]] ++;
		for (int i = 1; i < m; i ++) ws[i] += ws[i - 1];
		for (int i = n - 1; i >= 0; i --) sa[-- ws[wv[i]]] = y[i];
		swap(x, y); p = 1; x[sa[0]] = 0;
		for (int i = 1; i < n; i ++) x[sa[i]] = (y[sa[i - 1]] == y[sa[i]]) && (y[sa[i - 1] + k] == y[sa[i] + k]) ? p - 1 : p ++;
		if (p >= n) break; m = p;
	} 	
}

void calc()
{
	for (int i = 0; i < n; i ++) r[sa[i]] = i;
	int k = 0, j;
	for (int i = 0; i < n; h[r[i ++]] = k)
		for (k ? k -- : 0, j = sa[r[i] - 1]; a[i + k] == a[j + k]; k ++);
}

bool check(int x)
{
	int mx = sa[0], mn = sa[0];
	for (int i = 1; i < n; i ++){
		if (h[i] < x) mx = mn = sa[i];
		else {
			if (sa[i] < mn) mn = sa[i];
			if (sa[i] > mx) mx = sa[i];
			if (mx - mn > x) return 1;
		}
	}
	return 0;
}

void init()
{
	int x = 0, y; scanf("%d", &x); n --;
	for (int i = 0; i < n; i ++){
		scanf("%d", &y); a[i] = y - x + 100;
		x = y;
	} a[n] = 0;
	da(a, sa, n, 200); calc();
//	for (int i = 0; i < n; i ++) printf("%d ", sa[i]); printf("\n");
}

void doit()
{
	int l = 1, r = n / 2;
    while (l <= r){
        int mid = (l + r) >> 1;
        if (check(mid)) l = mid + 1;
        else r = mid - 1;
      }
      if(r >= 4) printf("%d\n", r + 1);
      else printf("0\n");
}

int main()
{
	while (scanf("%d", &n) != EOF){
		if (n == 0) break;
		init();
		doit();
	}
	return 0;
}

三、可覆盖 k 次的最长重复子串

和上一题的思路差不多,二分答案。判断时看每组的个数是否大于等于 k ,如果是就存在,否则不存在。

#include <cstdio>
#include <algorithm>

using namespace std;

const int MAX_N = 20005;

int n, K, a[MAX_N], sa[MAX_N], r[MAX_N], h[MAX_N];
int wa[MAX_N], wb[MAX_N], ws[1000005], wv[MAX_N];

void da(int *a, int *sa, int n, int m)
{
	int *x = wa, *y = wb;
	for (int i = 0; i < m; i ++) ws[i] = 0;
	for (int i = 0; i < n; i ++) ws[x[i] = a[i]] ++;
	for (int i = 1; i < m; i ++) ws[i] += ws[i - 1];
	for (int i = n - 1; i >= 0; i --) sa[-- ws[x[i]]] = i;
	for (int k = 1; k <= n; k <<= 1){
		int p = 0;
		for (int i = n - k; i < n; i ++) y[p ++] = i;
		for (int i = 0; i < n; i ++) if (sa[i] >= k) y[p ++] = sa[i] - k;
		for (int i = 0; i < n; i ++) wv[i] = x[y[i]];
		for (int i = 0; i < m; i ++) ws[i] = 0;
		for (int i = 0; i < n; i ++) ws[wv[i]] ++;
		for (int i = 1; i < m; i ++) ws[i] += ws[i - 1];
		for (int i = n - 1; i >= 0; i --) sa[-- ws[wv[i]]] = y[i];
		swap(x, y); p = 1; x[sa[0]] = 0;
		for (int i = 1; i < n; i ++) x[sa[i]] = (y[sa[i - 1]] == y[sa[i]]) && (y[sa[i - 1] + k] == y[sa[i] + k]) ? p - 1 : p ++;
		if (p >= n) break; m = p;
	} 	
}

void calc()
{
	for (int i = 0; i < n; i ++) r[sa[i]] = i;
	int k = 0, j;
	for (int i = 0; i < n; h[r[i ++]] = k){
		for (k ? k -- : 0, j = sa[r[i] - 1]; a[i + k] == a[j + k]; k ++);
	}
}

bool check(int x)
{
	int cnt = 1;
	for (int i = 1; i < n; i ++){
		if (h[i] < x) cnt = 1;
		else {
			cnt ++;
			if (cnt >= K) return 1;
		}
	}
	return 0;
}

void init()
{
	scanf("%d%d", &n, &K);
	for (int i = 0; i < n; i ++)
		scanf("%d", &a[i]);
	da(a, sa, n, 1000000); 
	calc();	
}

void doit()
{
	int l = 1, r = n, mid, ans;
	while (l <= r){
		mid = (l + r) >> 1;
		if (check(mid)) ans = mid, l = mid + 1;
		else r = mid - 1;
	}
	printf("%d\n", ans);
}

int main()
{
	init();
	doit();
	return 0;
}


  • 0
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值