借鉴了别人的思路:http://edward-mj.com/archives/1632
求串A和串B偏序匹配的所有位置。偏序匹配就是保持大小关系的离散化后两个串一模一样。
1≤|A|,|B|≤10 5 ,1≤A i ,B i ≤25
昨天网络赛的B题,我们当时随便弄了个
25∗(|A|+|B|)
的hash就过了,后来在Vani那听说有n lg k(k为字符集大小)的做法,想了一下总算明白了。记录一下。
其实满足以下两条性质的匹配关系都能KMP:
- 匹配的前缀性质。也就是说:若 A 1..n 和 B 1..n 匹配,那么对于任意 i=1..n ,有 A 1..i 和 B 1..i 匹配。
- 偏序匹配符合传递性。假设用 ≈ 表示两个串符合偏序匹配,那么 A≈B,B≈C⇒A≈C
既然能KMP了,关键就在于判断他们什么时候能往后面加一个字符。
考虑两个相同长度的串
A 1..n ,B 1..n
他们已经能够构成偏序匹配,那么假设现在新增了一个元素,构成了
A 1..n+1 ,B 1..n+1
,那么他们同样能构成偏序匹配的充要条件是什么呢?
仔细想想不难YY到:
count(A i <A n+1 )=count(B i <B n+1 ),i=1..n
count(A i =A n+1 )=count(B i =B n+1 ),i=1..n
count(A i >A n+1 )=count(B i >B n+1 ),i=1..n
因为n是固定的,所以只关注前两个条件即可。
然后这东西可以用树状数组维护。
KMP维护的实际是两个指针j和i,表示
A j..i
和
B 1..i−j+1
匹配,本质上i和j都是递增的,所以i加的时候把a[i]放进BIT,j加的时候扔出树状数组即可。
而模式串因为只和前缀比较,所以可以预处理出low和eq数组
时间复杂度
O(nlog 2 k)
空间复杂度
O(n)
#include <set>
#include <map>
#include <cmath>
#include <queue>
#include <stack>
#include <string>
#include <cstdio>
#include <cstring>
#include <iostream>
#include <algorithm>
using namespace std;
typedef long long LL;
const double PI = acos(-1.0);
template <class T> inline T MAX(T a, T b){if (a > b) return a;return b;}
template <class T> inline T MIN(T a, T b){if (a < b) return a;return b;}
const int N = 111;
const int M = 11111;
const LL MOD = 1000000007LL;
const int dir[4][2] = {1, 0, -1, 0, 0, -1, 0, 1};
const int INF = 0x3f3f3f3f;
int last, n, m, k;
int f[33], a[111111], b[111111];
int next[111111], low[111111], eq[111111], mac[111111];
int get(int x)
{
int res = 0;
for (int i = x; i; i = i - (i & (-i)))
{
res = res + f[i];
}
return res;
}
void add(int x, int v)
{
for (int i = x; i <= k; i = i + i & (-i))
{
f[i]+=v;
}
}
void init()
{
memset(f, 0, sizeof(f));
low[0] = eq[0] = 0;
for (int i = 1; i <= m; ++i)
{
low[i] = get(b[i] - 1);
eq[i] = get(b[i]) - low[i];
add(b[i], 1);
}
}
bool check(int *a, int lim, int i, int j)
{
while (last < lim) {add(a[last++], -1);}
int l = get(a[i] - 1), e = get(a[i]) - l;
return l == low[j] && e == eq[j];
}
void kmp()
{
memset(f, 0, sizeof(f));
next[1] = 0;
add(b[1], 1);
int ans = 0, j = 0;
last = 1;
for (int i = 2; i <= m; ++i)
{
while (j && !check(b, i - j, i, j + 1)) j = next[j];
if (!j || check(b, i - j, i, j + 1)) j++;
next[i] = j;
add(b[i], 1);
}
// for (int i = 1; i <= m; ++i)
// printf("%d ", next[i]);
// printf("\n");
memset(f, 0, sizeof(f));
last = 1; j = 0;
for (int i = 1; i <= n; ++i)
{
while (j && !check(a, i - j, i , j + 1)) j = next[j];
if (!j || check(a, i - j, i, j + 1)) j++;
if (j == m)
{
ans++;
j = 0;
}
add(a[i], 1);
}
printf("%d\n", ans);
}
int main()
{
while (scanf("%d%d%d", &n, &m, &k) != EOF)
{
for (int i = 1; i <= n; ++i)
scanf("%d", &a[i]);
for (int i = 1; i <= m; ++i)
scanf("%d", &b[i]);
init();
kmp();
}
return 0;
}
HASH的做法
因为K比较小, 第K个数字算出的HASH值比较
O(NK)
#include <set>
#include <map>
#include <cmath>
#include <queue>
#include <stack>
#include <string>
#include <cstdio>
#include <cstring>
#include <iostream>
#include <algorithm>
using namespace std;
typedef long long LL;
const double PI = acos(-1.0);
template <class T> inline T MAX(T a, T b){if (a > b) return a;return b;}
template <class T> inline T MIN(T a, T b){if (a < b) return a;return b;}
const int N = 111111;
const int M = 11111;
const LL MOD = 1000000007LL;
const int dir[4][2] = {1, 0, -1, 0, 0, -1, 0, 1};
const int INF = 0x3f3f3f3f;
int ah[33], bh[33];
int a[N], b[N];
int n, m, k;
int base = 29;
bool c[N];
int main()
{
while (scanf("%d%d%d", &n, &m, &k) != EOF)
{
int i, j;
int DEF = 1;
for (i = 0; i < n; ++i)
scanf("%d", &a[i]);
for (i = 0; i < m; ++i)
scanf("%d", &b[i]);
for (i = 0; i < m - 1; ++i)
DEF *= base;
memset(ah, 0, sizeof(ah));
memset(bh, 0, sizeof(bh));
vector < int > ans;
fill(c, c + n + 1, false);
for (i = 0; i < m; ++i)
{
for (j = 0; j < k; ++j)
bh[j] *= base;
bh[b[i] - 1]++;
}
for (i = 0; i < n; ++i)
{
for (j = 0; j < k; ++j)
ah[j] *= base;
ah[a[i] - 1]++;
if (i >= m - 1)
{
int t1 = k - 1, t2 = k - 1;
bool sue = false;
while (true)
{
while (t1 >= 0 && ah[t1] == 0) t1--;
while (t2 >= 0 && bh[t2] == 0) t2--;
if (t1 == -1 && t2 == -1)
{
sue = true;
break;
}
if (t1 == -1 || t2 == -1) break;
if (ah[t1] != bh[t2]) break;
t1--; t2--;
}
if (sue) ans.push_back(i - m + 1);
ah[a[i - m + 1] - 1] -= DEF;
}
}
int last = -1, cnt = 0;
for (i = 0; i < ans.size(); ++i)
{
if (ans[i] < last) continue;
last = ans[i] + m;
cnt++;
}
printf("%d\n", cnt);
}
return 0;
}