string string string
Time Limit: 2000/1000 MS (Java/Others) Memory Limit: 32768/32768 K (Java/Others)
Total Submission(s): 3492 Accepted Submission(s): 1178
Problem Description
Uncle Mao is a wonderful ACMER. One day he met an easy problem, but Uncle Mao was so lazy that he left the problem to you. I hope you can give him a solution.
Given a string s, we define a substring that happens exactly k times as an important string, and you need to find out how many substrings which are important strings.
Input
The first line contains an integer T (T≤100) implying the number of test cases.
For each test case, there are two lines:
the first line contains an integer k (k≥1) which is described above;
the second line contain a string s (length(s)≤105).
It's guaranteed that ∑length(s)≤2∗106.
Output
For each test case, print the number of the important substrings in a line.
Sample Input
2
2
abcabc
3
abcabcabcabc
Sample Output
6
9
Source
2017 ACM/ICPC Asia Regional Shenyang Online
题目链接:http://acm.hdu.edu.cn/showproblem.php?pid=6194
题目大意:求一个字符串中不同的恰好出现了k次的子串个数
题目分析:先求出height数组,接下来分为两种情况考虑,k==1,k>1 (因为height数组本身就已经包含两个串的信息了)
1. k == 1
先求出当前的前缀串长:n-sa[i]+1,然后减去height[i]和height[i+1]的最大值(即减去与其最长的公共前缀的长度)比如suffix[sa[i]] = "abcad",suffix[sa[i-1]] = "abcd",suffix[sa[i+1]] = "ab",显然height[i] = 3,height[i+1] = 2,len(suffix[sa[i]]) = 5,那么对于sa[i]这个后缀的前缀对答案的贡献为2,即"abca"和"abcad"这两个子串
2. k > 1
将问题变为求至少出现k次的子串,然后容斥一下,具体做法: 枚举每个k段(对height来说是连续的k-1个值),假设当前枚举到的名次为i,那么对height来说就是[i+1, i + k - 1]这段区间,显然我们要的是这段区间里height的最小值(随便用个可以维护区间最小值的数据结构即可),还是拿上面的例子,假设此时的k为3,min(height[i], height[i+1]) = 2,即子串"a","ab"均至少出现3次(在这我们不考虑b,只关心后缀串的所有前缀),然后就是经典的容斥场景,减去至少出现k+1次的,分别往左往右扩展一位,再加上多减去的那部分
#include <cstdio>
#include <cstring>
#include <algorithm>
#define lson l, mid, rt << 1
#define rson mid + 1, r, rt << 1 | 1
using namespace std;
int const MAX = 1e5 + 5;
char s[MAX];
int n, m, k, sa[MAX], height[MAX];
int rk[MAX], tp[MAX], tax[MAX];
int seg[MAX];
bool cmp(int* r, int a, int b, int k) {
return r[a] == r[b] && r[a + k] == r[b + k];
}
void radix_sort() {
for (int i = 0; i <= m; i++) {
tax[i] = 0;
}
for (int i = 1; i <= n; i++) {
tax[rk[tp[i]]]++;
}
for (int i = 1; i <= m; i++) {
tax[i] += tax[i - 1];
}
for (int i = n; i >= 1; i--) {
sa[tax[rk[tp[i]]]--] = tp[i];
}
}
void get_sa() {
for (int i = 1; i <= n; i++) {
rk[i] = s[i];
tp[i] = i;
m = max(m, (int)s[i]);
}
radix_sort();
for (int j = 1, p = 0; p < n; j <<= 1, m = p) {
p = 0;
for (int i = n - j + 1; i <= n; i++) {
tp[++p] = i;
}
for (int i = 1; i <= n; i++) {
if (sa[i] > j) {
tp[++p] = sa[i] - j;
}
}
radix_sort();
swap(rk, tp);
rk[sa[1]] = p = 1;
for (int i = 2; i <= n; i++) {
rk[sa[i]] = cmp(tp, sa[i], sa[i - 1], j) ? p : ++p;
}
}
}
void get_height() {
for (int i = 1, j = 0; i <= n; i++) {
if (j) {
j--;
}
int prevPos = sa[rk[i] - 1];
while (i + j <= n && prevPos + j <= n && s[i + j] == s[prevPos + j]) {
j++;
}
height[rk[i]] = j;
}
}
void pushup(int rt) {
seg[rt] = min(seg[rt << 1], seg[rt << 1 | 1]);
}
void build(int l, int r, int rt) {
if (l == r) {
seg[rt] = height[l];
return;
}
int mid = (l + r) >> 1;
build(lson);
build(rson);
pushup(rt);
}
int query(int L, int R, int l, int r, int rt) {
if (L <= l && r <= R) {
return seg[rt];
}
int mid = (l + r) >> 1, ans = n;
if (L <= mid) {
ans = min(ans, query(L, R, lson));
}
if (mid < R) {
ans = min(ans, query(L, R, rson));
}
return ans;
}
int main() {
int T;
scanf("%d", &T);
while (T--) {
memset(height, 0, sizeof(height));
memset(rk, 0, sizeof(rk));
scanf("%d%s", &k, s + 1);
n = strlen(s + 1);
get_sa();
get_height();
build(1, n, 1);
int ans = 0;
if (k == 1) {
for (int i = 1; i <= n; i++) {
ans += max(0, n - sa[i] + 1 - max(height[i], height[i + k]));
}
} else {
for (int i = 1; i + k - 1 <= n; i++) {
ans += query(i + 1, i + k - 1, 1, n, 1);
ans -= query(i, i + k - 1, 1, n, 1);
if (i + k <= n) {
ans -= query(i + 1, i + k, 1, n, 1);
ans += query(i, i + k, 1, n, 1);
}
}
}
printf("%d\n", ans);
}
}