POJ-3261 Milk Patterns(后缀数组)

Farmer John has noticed that the quality of milk given by his cows varies from day to day. On further investigation, he discovered that although he can't predict the quality of milk from one day to the next, there are some regular patterns in the daily milk quality.

To perform a rigorous study, he has invented a complex classification scheme by which each milk sample is recorded as an integer between 0 and 1,000,000 inclusive, and has recorded data from a single cow over N (1 ≤ N ≤ 20,000) days. He wishes to find the longest pattern of samples which repeats identically at least K (2 ≤ K ≤ N) times. This may include overlapping patterns -- 1 2 3 2 3 2 3 1 repeats 2 3 2 3 twice, for example.

Help Farmer John by finding the longest repeating subsequence in the sequence of samples. It is guaranteed that at least one subsequence is repeated at least K times.

Input

Line 1: Two space-separated integers: N and K
Lines 2.. N+1: N integers, one per line, the quality of the milk on day i appears on the ith line.

Output

Line 1: One integer, the length of the longest pattern which occurs at least K times

Sample Input

8 2
1
2
3
2
3
2
3
1

Sample Output

4

题意:问至少出现k次的最长重复子串的长度。

思路: 求出height, 二分长度, 例如height分段的性质, 查询当前长度出现次数是否≥k。

#include <iostream>
//#include <unordered_map>
#include <time.h>
#include <algorithm>
#include <stdio.h>
#include <string.h>
using namespace std;
typedef long long ll;
typedef unsigned long long ull;

#ifdef LOCAL
#define debug(x) cout << "[" __FUNCTION__ ": " #x " = " << (x) << "]\n"
#define TIME cout << "RuningTime: " << clock() << "ms\n", 0
#else
#define TIME 0
#endif
#define hash_ 1000000009
#define Continue(x) { x; continue; }
#define Break(x) { x; break; }
const int mod = 1e9 + 7;
const int N = 2e5 + 10;
const int INF = 0x3f3f3f3f;
const ll LINF = 0x3f3f3f3f3f3f3f3f;
#define gc p1 == p2 && (p2 = (p1 = buf) + fread(buf, 1, 1000000, stdin), p1 == p2) ? EOF : *p1++;
inline int read(){ static char buf[1000000], *p1 = buf, *p2 = buf; register int x = false; register char ch = gc; register bool sgn = false; while (ch != '-' && (ch < '0' || ch > '9')) ch = gc; if (ch == '-') sgn = true, ch = gc; while (ch >= '0'&& ch <= '9') x = (x << 1) + (x << 3) + (ch ^ 48), ch = gc; return sgn ? -x : x; }
ll fpow(ll a, int b, int mod) { ll res = 1; for (; b > 0; b >>= 1) { if (b & 1) res = res * a % mod; a = a * a % mod; } return res; }
int MX;
int str[N];
struct SA{
	int n, r; //n字符串长度 r基数
	int sa[N]; //排名为i的后缀位置+1 i取值1~n
	int cnt[N]; //基数排序辅助数组
	int rak[N]; //第i个后缀的排名
	int tmp[N]; //rak的辅助数组
	int heig[N]; //后缀排序相邻LCP
	void radix_sort(int *rk, int *tp)
	{
		memset(cnt, 0, sizeof cnt);   // tp[i]用于记录下标, 即是第二排序依据
		for (int i = 1; i <= n; i++)
			cnt[rk[tp[i]]]++;
		for (int i = 1; i <= r; i++)  // 与基数排序一样
			cnt[i] += cnt[i - 1];
		for (int i = n; i >= 1; i--) // 赋值给sa
			sa[cnt[rk[tp[i]]]--] = tp[i];
	}
	void suffix()
	{
		int *rk = rak, *tp = tmp;
		for (int i = 1; i <= n; i++)
			rk[i] = str[i], tp[i] = i; // 最开始, 排名不清楚, 默认字符大小
		r = MX; // 0 ~ 127
		radix_sort(rk, tp);
		for (int l = 1, p = 1, i; p < n; l <<= 1, r = p) // l 是上一次排序的长度, r为最高的排名数
		{
			for (p = 0, i = n - l + 1; i <= n; i++) //长度不满l的后缀, 后面为空, 显然优先级最高
				tp[++p] = i;
			for (i = 1; i <= n; i++) // sa[i] 上一次的排名情况, sa[i] - l为当前这个串的前一半的位置, 直接赋值给tp
				if (sa[i] > l)    // 因为是从1 ~ n, 排名按照从小到大
					p++, tp[p] = sa[i] - l;
			radix_sort(rk, tp);
			swap(rk, tp); // rk, tp交换接下来更新rk, tp存的是未更新的rk的信息
			rk[sa[1]] = p = 1;
			for (i = 2; i <= n; i++)// 若一个不相等即排名++, p也即是不同排名的个数
			{
				if (tp[sa[i]] != tp[sa[i - 1]] || tp[sa[i] + l] != tp[sa[i - 1] + l])
					p++;
				rk[sa[i]] = p;
			}
		}
	}
	void get_height()//heig[i]为str[sa[i-1]]与str[sa[i]]的最长公共前缀
	{
		for (int i = 1; i <= n; i++)
			rak[sa[i]] = i;
		int k = 0;
		for (int i = 1; i <= n; i++)
		{
			if (k)
				k--;      // k是比较的第几个字符
			int j = sa[rak[i] - 1]; // 前一个排名的位置
			while (str[i + k] == str[j + k])
				k++;
			heig[rak[i]] = k;
		}
	}
}sa;
bool check(int mid, int k)
{
	int cnt = 1;
	for (int i = 2; i <= sa.n; i++)
	{
		if (sa.heig[i] >= mid)
		{
			cnt++;
			if (cnt >= k)
				return true;
		}
		else
			cnt = 1;
	}
	return false;
}
int main()
{
#ifdef LOCAL
	freopen("D:/input.txt", "r", stdin);
#endif
	int n, k;
	cin >> n >> k;
	for (int i = 1; i <= n; i++)
		scanf("%d", &str[i]), MX = max(MX, str[i]);
	sa.n = n;
	sa.suffix();
	sa.get_height();
	int L = 1, R = sa.n, ans = 0;
	while (L <= R)
	{
		int mid = L + R >> 1;
		if (check(mid, k))
			L = mid + 1, ans = mid;
		else
			R = mid - 1;
	}
	cout << ans << endl;
	return TIME;
}

 

 

 

  • 1
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值