后缀数组例题集

按这位大佬的博客刷的https://blog.csdn.net/tianyuhang123/article/details/54977648

poj-1743

不可重叠最长重复子串

给你一串数字,求它们最长的重复(公差相同)子序列,且两个子序列不相交

我们可以向二分枚举ans长度,如果能找到两个height>=ans,而且通过sa判断两个的间距>=ans,则说明这个答案合适。

code:

#include<iostream>
#include<cstdio>
#include<cstring>
#include<algorithm>
#include<queue>
#include<string>
using namespace std;
#define inf 0x3f3f3f
#define N 40005
int a[N], height[N];
bool same(int *ranks, int l, int r, int k) {
	return ranks[l] == ranks[r] && ranks[l + k] == ranks[r + k];
}
void da(int n,int m, int *rk, int *sa, int *sa2, int *cnt) {
	for (int i = 0; i < m; i++)cnt[i] = 0;
	for (int i = 0; i < n; i++)cnt[rk[i] = a[i]]++;
	for (int i = 1; i < m; i++)cnt[i] += cnt[i - 1];
	for (int i = n - 1; i >= 0; i--)sa[--cnt[rk[i]]] = i;
	for (int i = 1; i < n; i*=2) {
		int p = 0;
		for (int j = n - i; j < n; j++)sa2[p++] = j;
		for (int j = 0; j < n; j++) {
			if (sa[j] >= i) 
				sa2[p++] = sa[j] - i;
		}
		for (int j = 0; j < m; j++)cnt[j] = 0;
		for (int j = 0; j < n; j++)cnt[rk[j]]++;
		for (int j = 1; j < m; j++)cnt[j] += cnt[j - 1];
		for (int j = n - 1; j >= 0; j--)sa[--cnt[rk[sa2[j]]]] = sa2[j]; 
		swap(rk, sa2);
		rk[sa[0]] = 0;
		for (int j = 1; j < n; j++) 
			rk[sa[j]] = rk[sa[j - 1]] + !same(sa2, sa[j - 1], sa[j], i);
		m = rk[sa[n - 1]] + 1;
		if (m == n)break;
	}
	for (int i = 0, j, lcp = 0; i < n-1; i++) {
		lcp ? --lcp : 0;
		j = sa[rk[i] - 1];
		for (; a[j + lcp] == a[i + lcp]; lcp++);
		height[rk[i]] = lcp;
	}
}
int rk[N], sa[N], sa2[N], cnt[N];
int main() {
	int n;
	while (~scanf("%d", &n) && n) {
		for (int i = 0; i < n; i++) {
			scanf("%d", &a[i]);
		}
		for (int i = 0; i < n - 1; i++) {
			a[i] = a[i+1] - a[i] + 100;
		}
		a[n - 1] = 0;
		da(n, 200, rk, sa, sa2, cnt);
		int l = 1, r = n;
		int L = inf, R = -inf;
		bool flag = false;
		while (l <= r) {
			int mid = (l + r) >> 1;
			L = inf, R = -inf, flag = false;
			for (int i = 1; i < n; i++) {
				if (height[i] >= mid) {
					L = min(L, sa[i]);
					L = min(L, sa[i - 1]);
					R = max(R, sa[i]);
					R = max(R, sa[i - 1]);
				}
				else {
					if (L + mid + 1 <= R)flag = true;
					L = inf, R = -inf;
				}
			}
			if (L + mid + 1 <= R)flag = true;
			if (flag)l = mid + 1;
			else r = mid - 1;
		}
		if (l >= 5)cout << l << endl;
		else cout << "0" << endl;
	}
	return 0;
}

poj-3261

可重叠的k 次最长重复子串

思路:其实通过做几道后缀数组,那么这道题就是模板水题了,通过二分长度,然后分成若干组去寻找答案

http://poj.org/problem?id=3261

code:

#include<iostream>
#include<cstdio>
#include<cstring>
#include<string>
#include<algorithm>
#include<queue>
#include<vector>
using namespace std;
#define M 1000005
#define N 20005
#define inf 0x3f3f3f
int a[N], height[N];
int k;
bool same(int *ranks, int l, int r, int k) {
	return ranks[l] == ranks[r] && ranks[l + k] == ranks[r + k];
}
void da(int n, int m, int *rk, int *sa, int *sa2, int *cnt) {
	for (int i = 0; i < m; i++)cnt[i] = 0;
	for (int i = 0; i < n; i++)cnt[rk[i] = a[i]]++;
	for (int i = 1; i < m; i++)cnt[i] += cnt[i - 1];
	for (int i = n - 1; i >= 0; i--)sa[--cnt[rk[i]]] = i;
	for (int i = 1; i < n; i *= 2) {
		int p = 0;
		for (int j = n - i; j < n; j++)sa2[p++] = j;
		for (int j = 0; j < n; j++) {
			if (sa[j] >= i)
				sa2[p++] = sa[j] - i;
		}
		for (int j = 0; j < m; j++)cnt[j] = 0;
		for (int j = 0; j < n; j++)cnt[rk[j]]++;
		for (int j = 1; j < m; j++)cnt[j] += cnt[j - 1];
		for (int j = n - 1; j >= 0; j--)sa[--cnt[rk[sa2[j]]]] = sa2[j];
		swap(rk, sa2);
		rk[sa[0]] = 0;
		for (int j = 1; j < n; j++)
			rk[sa[j]] = rk[sa[j - 1]] + !same(sa2, sa[j - 1], sa[j], i);
		m = rk[sa[n - 1]] + 1;
		if (m == n)break;
	}
	for (int i = 0, j, lcp = 0; i < n - 1; i++) {
		lcp ? --lcp : 0;
		j = sa[rk[i] - 1];
		for (; a[j + lcp] == a[i + lcp]; lcp++);
		height[rk[i]] = lcp;
	}
}
int rk[M], cnt[M], sa[N], sa2[N];
int main() {
	int n;
	scanf("%d%d", &n, &k);
	for (int i = 0; i < n; i++)
		scanf("%d", &a[i]);
	a[n++] = 0;
	da(n,1000001, rk, cnt, sa, sa2);
	int l = 1, r = n-1, t;
	bool flag;
	while (l <= r) {
		int mid = (l + r) >> 1;
	    flag = false, t = 1;
		for (int i = 1; i < n; i++) {
			if (height[i] >= mid) 
				t++;
			else {
				if (t >= k) {
					flag = true;
					break;
				}
				t = 1;
			}
		}
		if (t >= k)
			flag = true;
		if (flag)l = mid + 1;
		else r = mid-1;
	}
	cout << r << endl;
	return 0;
}

spoj-694 不相同的子串的个数

可以转换成求相同子串的个数,而相同子串的个数可以转换成公共前缀的个数

->len*(len+1)/2-sum(height[])

code:

#include<iostream>
#include<cstdio>
#include<cstring>
#include<string>
#include<algorithm>
using namespace std;
#define N 10005
char str[N];
int t, height[N];
int sum = 0;
bool same(int *rank, int x, int y, int k) {
	return rank[x] == rank[y] && rank[x + k] == rank[y + k];
}
void da(int n, int m, int *cnt, int *sa, int *sa2, int *rk) {
	for (int i = 0; i < m; i++)cnt[i] = 0;
	for (int i = 0; i < n; i++)cnt[rk[i] = str[i]]++;
	for (int i = 1; i < m; i++)cnt[i] += cnt[i - 1];
	for (int i = n - 1; i >= 0; i--)sa[--cnt[rk[i]]] = i;
	for (int i = 1; i < n; i *= 2) {
		int p = 0;
		for (int j = n - i; j < n; j++)sa2[p++] = j;
		for (int j = 0; j < n; j++) {
			if (sa[j] >= i)sa2[p++] = sa[j] - i;
		}
		for (int j = 0; j < m; j++)cnt[j] = 0;
		for (int j = 0; j < n; j++)cnt[rk[j]]++;
		for (int j = 1; j < m; j++)cnt[j] += cnt[j - 1];
		for (int j = n - 1; j >= 0; j--)sa[--cnt[rk[sa2[j]]]] = sa2[j];
		swap(rk, sa2);
		rk[sa[0]] = 0;
		for (int j = 1; j < n; j++) 
			rk[sa[j]] = rk[sa[j - 1]] + !same(sa2, sa[j], sa[j - 1], i);
		m = rk[sa[n - 1]] + 1;
		if (m == n)break;
	}
	for (int i = 0, j, lcp = 0; i < n-1; i++) {
		lcp ? --lcp : 0;
		j = sa[rk[i] - 1];
		for (; str[j + lcp] == str[i + lcp]; lcp++);
		height[sa[i]] = lcp;
		sum += lcp;
	}
}
int cnt[N], sa[N], sa2[N], rk[N];
int main() {
	scanf("%d", &t);
	cin.get();
	while (t--) {
		sum = 0;
		scanf("%s", str);
		int len = strlen(str);
		da(len + 1, 180, cnt, sa, sa2, rk);
		cout << (len + 1)*len / 2 - sum << endl;
	}
	return 0;
}

最长回文子串(ural1297)

分两种情况,一是回文子串的长度为奇数,二是长度为偶数。两种情况都可以转化为求一个后缀和一个反过来写的后缀的最长公共前缀。具体的做法是:将整个字符串反过来写在原字符串后面,中间用一个特殊的字符隔开。这样就把问题变为了求这个新的字符串的某两个后缀的最长公共前缀。

code:

#include<iostream>
#include<cstdio>
#include<cstring>
#include<string>
#include<algorithm>
using namespace std;
#define N 2222
char str[N], s[N];
int height[N];
bool same(int *rank, int x, int y, int k) {
	return rank[x] == rank[y] && rank[x + k] == rank[y + k];
}
void da(int n, int m, int *cnt, int *sa, int *sa2, int *rk) {
	for (int i = 0; i < m; i++)cnt[i] = 0;
	for (int i = 0; i < n; i++)cnt[rk[i] = str[i]]++;
	for (int i = 1; i < m; i++)cnt[i] += cnt[i - 1];
	for (int i = n - 1; i >= 0; i--)sa[--cnt[rk[i]]] = i;
	for (int i = 1; i < n; i *= 2) {
		int p = 0;
		for (int j = n - i; j < n; j++)sa2[p++] = j;
		for (int j = 0; j < n; j++) 
			if (sa[j] >= i)sa2[p++] = sa[j] - i;
		for (int j = 0; j < m; j++)cnt[j] = 0;
		for (int j = 0; j < n; j++)cnt[rk[j]]++;
		for (int j = 1; j < m; j++)cnt[j] += cnt[j - 1];
		for (int j = n - 1; j >= 0; j--)sa[--cnt[rk[sa2[j]]]] = sa2[j];
		swap(rk, sa2);
		rk[sa[0]] = 0;
		for (int j = 1; j < n; j++)
			rk[sa[j]] = rk[sa[j - 1]] + !same(sa2, sa[j], sa[j - 1], i);
		m = rk[sa[n - 1]] + 1;
		if (m == n)break;
	}
	for (int i = 0, j, lcp = 0; i < n - 1; i++) {
		lcp ? --lcp : 0;
		j = sa[rk[i] - 1];
		for (; str[j + lcp] == str[i + lcp]; lcp++);
		height[rk[i]] = lcp;
	}
	cout << rk[4] << endl;
}
int cnt[N], sa[N], sa2[N], rk[N];
int main() {
	scanf("%s", str);
	strcpy(s, str);
	strrev(s);
	strcat(str, "}");
	strcat(str, s);
	//cout << str << endl;
	int len = strlen(str);
	da(len+1, 300, cnt, sa, sa2, rk);
	cout << rk[4] << endl;
	int res=0, t;
	for (int i = 1; i <= len; i++) {
		int minn = min(sa[i], sa[i - 1]);
		int maxx = max(sa[i], sa[i - 1]);
		if ((minn < (len / 2) && maxx >= (len / 2)) && (minn + height[i] == len - maxx)) {
			if (height[i] > res) {
				res = height[i];
				t = minn;
			}
			else {
				if (height[i] == res)
					t = min(t, minn);
			}
		}
	}
	for (int i = 0; i < res; i++)
		cout << str[i + t];
	return 0;
}

poj-3693 重复次数最多的连续重复子串

容易想到的就是枚举长度为L,然后看长度为L的字符串最多连续出现几次。

长度为L的串重复出现,那么st[0],st[l],st[2*l]……st[k*l]中肯定有两个连续的出现在字符串中。不然肯定长度不超过2*L啊。那么我们就枚举连续的两个,然后从这两个字符前后匹配,看最多能匹配多远。

即以st[i*l],st[i*l+l]前后匹配,这里是通过查询suffix(i*l),suffix(i*l+l)的最长公共前缀

通过rank值能找到i*l,与i*l+l的排名,我们要查询的是这段区间的height的最小值,通过RMQ预处理

达到查询为0(1)的复杂度,设LCP长度为M, 则答案显然为M / L + 1, 但这只是以i*l和i*l+l为起点的情况, 不过有一点是可以确定的。如果目标子串包含了

i*l和i*l+l。那么 i*l一定是和i*l+l匹配的。因为目标串中p一定和p+l匹配。这样才能满足子串长度为l。先在要解决的就是起点不在这两个位置上怎么办了。

 得到M/L+1我们可以试着把答案变大。如果M%L!=0我们可以把长度补齐到L的整数倍。即在前面增加(L-M%L)的字符.看能不能使答案变大。为什么这样做是可以的呢?因为我们要使啊、答案变大往后扩展肯定不行了。因为后面已经不匹配了。但是我们为什么扩展 (L-M%L)这么多就行了呢。比这个小肯定是不行的。因为还是没到L的整数倍。比这个多能行的话。去这个值一定能行。因为p是和p+L匹配的。既然取得比这个多。大不了往右平移几个还是能使 M%L得到匹配。那为什么只扩展一个长度L。不扩展多个呢。因为你是枚举每个i*l和i*l+l。你扩展2个或两个以上就是前面的 i*l和i*l+l的情况了。这一步完成后我们只能得到度数最大长度可能的取值。剩下的工作就是找字典序最小了。 通过sa数组进行枚举,取到的第一组,肯定是字典序最小的。
code:

#include<iostream>
#include<cstdio>
#include<cstring>
#include<string>
#include<algorithm>
#include<cmath>
using namespace std;
#define N 100005
char str[N];
int dp[N][20];
int height[N];
int cnt[N], sa[N], ranks[N];
void calheight(int n) {
	for (int i = 1; i <= n; i++)ranks[sa[i]] = i;
	for (int i = 0, j, lcp = 0; i < n; i++) {
		lcp ? --lcp : 0;
		j = sa[ranks[i] - 1];
		for (; str[i + lcp] == str[j + lcp]; lcp++);
		height[ranks[i]] = lcp;
	}
}
bool same(int *rank, int x, int y, int k) {
	return rank[x] == rank[y] && rank[x + k] == rank[y + k];
}
void da(int n, int m, int *sa2, int *rk) {
	for (int i = 0; i < m; i++)cnt[i] = 0;
	for (int i = 0; i < n; i++)cnt[rk[i] = str[i]]++;
	for (int i = 1; i < m; i++)cnt[i] += cnt[i - 1];
	for (int i = n - 1; i >= 0; i--)sa[--cnt[rk[i]]] = i;
	for (int i = 1; i < n; i*=2) {
		int p = 0;
		for (int j = n - i; j < n; j++)sa2[p++] = j;
		for (int j = 0; j < n; j++) 
			if (sa[j] >= i)sa2[p++] = sa[j] - i;
		for (int j = 0; j < m; j++)cnt[j] = 0;
		for (int j = 0; j < n; j++)cnt[rk[j]]++;
		for (int j = 1; j < m; j++)cnt[j] += cnt[j - 1];
		for (int j = n - 1; j >= 0; j--)sa[--cnt[rk[sa2[j]]]] = sa2[j];
		swap(rk, sa2);
		rk[sa[0]] = 0;
		for (int j = 1; j < n; j++) {
			rk[sa[j]] = rk[sa[j - 1]] + !same(sa2, sa[j], sa[j - 1], i);
		}
		m = rk[sa[n - 1]] + 1;
		if (m == n)break;
	}
}
int sa2[N], rk[N], res[N];
void RMQ_int(int n) {
	int m = floor(log(n + 0.0) / log(2.0));
	for (int i = 1; i <= n; i++)  dp[i][0] = height[i];
	for (int i = 1; i <= m; i++) {
		for (int j = n; j; j--) {
			dp[j][i] = dp[j][i - 1];
			if (j + (1 << (i - 1)) <= n)
				dp[j][i] = min(dp[j][i], dp[j + (1 << (i - 1))][i - 1]);
		}
	}
}
int Rmq_Query(int l, int r) {
	int a = ranks[l], b = ranks[r];
	if (a > b)swap(a, b);
	a++;
	int m = floor(log(b - a + 1.0) / log(2.0));
	return min(dp[a][m], dp[b - (1 << m) + 1][m]);
}
int main() {
	int cas = 0;
	while (scanf("%s", str)!=EOF && str[0] != '#') {
		int len = strlen(str);
		da(len + 1, 180,  sa2, rk);
		calheight(len);
		RMQ_int(len);
		int maxn = 0, cnt;
		for (int l = 1; l < len; l++) {
			for (int i = 0; i + l < len; i += l) {
				int r = Rmq_Query(i, i + l);
				int step = r / l + 1;
				int k = i - (l - r % l);
				if (k >= 0 && r%l) 
					if (Rmq_Query(k, k + l) >= r)step++;
				if (step > maxn) {
					maxn = step;
					cnt = 0;
					res[cnt++] = l;
					
				}
				else if (step == maxn) 
					res[cnt++] = l;
			}
		}
		int len1 = -1, st;
		for (int i = 1; i <= len && len1 == -1; i++) {
			for (int j = 0; j < cnt; j++) {
				int l = res[j];
				if (Rmq_Query(sa[i], sa[i] + l) >= (maxn - 1)*l) {
					len1 = l;
					st = sa[i];
					break;
				}
			}
		}
		cout << "Case " << ++cas << ": ";
		for (int i = st, j = 0; j < len1*maxn; j++, i++)cout << str[i];
		cout << endl;
	}
	return 0;
}

 

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 1
    评论
评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值