Time Limit: 5000MS | Memory Limit: 65536K | |
Total Submissions: 8358 | Accepted: 2761 |
Description
A substring of a string T is defined as:
Given two strings A, B and one integer K, we define S, a set of triples (i, j, k):
You are to give the value of |S| for specific A, B and K.
Input
The input file contains several blocks of data. For each block, the first line contains one integer K, followed by two lines containing strings A and B, respectively. The input file is ended by K=0.
1 ≤ |A|, |B| ≤ 105
1 ≤ K ≤ min{|A|, |B|}
Characters of A and B are all Latin letters.
Output
For each case, output an integer |S|.
Sample Input
2 aababaa abaabaa 1 xx xx 0
Sample Output
22 5
Source
题意:给出两个字符串SA, SB和一个数K,求两个字符串的长度不小于k的公共子串个数。
思路:两个字符串中间用一个没出现过的字符连接,之后求出height数组,根据K将后缀分组,对每一组判断每个后缀属于SA串还是SB串,对每个SA串,求前面的SB串与它的LCP长度,同样,对每个SB串,求前面的SA串与它的LCP长度。对于两个后缀,设它们的LCP值为L,则这两个后缀能产生L-K+1个公共字串。而求每个SA串与前面的SB串能产生多少公共子串可以用单调栈O(len)的实现:栈中维护每个后缀的height值,以及从起点扫描到该位置时,有多少个后缀的height值大于等于当前后缀的height值,于是每次扫描到一个后缀,将其加入单调栈中(需要弹出小于等于它的元素),计算该后缀的cnt值。
#include <iostream>
#include <cstdio>
#include <cstdlib>
#include <cstring>
#include <string>
#include <algorithm>
#include <map>
#include <stack>
using namespace std;
#define INF 110000
#define N 201000
#define M 201000
int k, n, m, len1, len2;
char s[N];
int str[N], rank[N], sa[N], bucket[N], trank[N], sa2[N], height[N];
class Node
{
public:
int val, cnt;
Node(int _val, int _cnt)
{
val = _val;
cnt = _cnt;
}
};
stack<Node> sta;
int cmp(int p1, int p2, int len)
{
return p1+len < n && p2+len < n && trank[p1] == trank[p2] && trank[p1+len] == trank[p2+len];
}
void getSa()
{
for(int i = 0; i < m; i++) bucket[i] = 0;
for(int i = 0; i < n; i++) bucket[rank[i] = str[i]]++;
for(int i = 1; i < m; i++) bucket[i] += bucket[i-1];
for(int i = n-1; i >= 0; i--) sa[--bucket[rank[i]]] = i;
for(int j = 1, p = 0; p < n; j <<= 1, m = p)
{
p = 0;
for(int i = n-j; i < n; i++) sa2[p++] = i;
for(int i = 0; i < n; i++)
if(sa[i] >= j) sa2[p++] = sa[i]-j;
for(int i = 0; i < m; i++) bucket[i] = 0;
for(int i = 0; i < n; i++) bucket[rank[i]]++;
for(int i = 1; i < m; i++) bucket[i] += bucket[i-1];
for(int i = n-1; i >= 0; i--) sa[--bucket[rank[sa2[i]]]] = sa2[i];
for(int i = 0; i < n; i++) trank[i] = rank[i];
p = 0; rank[sa[0]] = p++;
for(int i = 1; i < n; i++)
rank[sa[i]] = cmp(sa[i], sa[i-1], j) ? p-1 : p++;
}
}
void getHeight()
{
int h = 0;
height[0] = 0;
for(int i = 0; i < n; i++)
{
if(rank[i] == 0) continue;
if(h) h--;
int pre = sa[rank[i]-1];
for(; pre+h < n && i+h < n && str[i+h] == str[pre+h]; h++);
height[rank[i]] = h;
}
}
inline int get(int pos)
{
if(sa[pos] == len1) return 0;
return sa[pos] < len1 ? 1 : 2;
}
long long cal(int l, int r, int flag)
{
while(!sta.empty()) sta.pop();
long long res = 0, sum = 0;
if(get(l) != flag)
{
sta.push(Node(INF, 1));
sum += INF-k+1;
}
for(int i = l+1; i <= r; i++)
{
int cnt = 0;
while(!sta.empty() && sta.top().val >= height[i])
{
sum -= (sta.top().val-k+1)*sta.top().cnt;
cnt += sta.top().cnt;
sum += (height[i]-k+1)*sta.top().cnt;
sta.pop();
}
if(cnt)
sta.push(Node(height[i], cnt));
if(get(i) != flag)
{
sum += INF-k+1;
sta.push(Node(INF, 1));
}
if(get(i) == flag)
{
res += sum;
}
}
return res;
}
int main()
{
//freopen("C:\\Users\\zfh\\Desktop\\in.txt", "r", stdin);
while(scanf("%d", &k) != -1 && k)
{
scanf(" %s", s);
n = 0; m = 300;
len1 = strlen(s);
for(int i = 0; i < len1; i++) str[n++] = s[i];
str[n++] = 0;
scanf(" %s", s);
len2 = strlen(s);
for(int i = 0; i < len2; i++) str[n++] = s[i];
str[n] = 1;
getSa();
getHeight();
int sp = 0, tp = 0;
long long ans = 0;
for(int i = 0; i < n; i++)
{
if(height[i] >= k)
{
tp = i;
}
else
{
ans += cal(sp, tp, 1);
ans += cal(sp, tp, 2);
sp = tp = i;
}
}
ans += cal(sp, tp, 1);
ans += cal(sp, tp, 2);
printf("%I64d\n", ans);
}
return 0;
}