在东北集训期间学了好多玄妙的东西…
Trie树构建一波——
#include<iostream>
#include<cstdio>
#include<cstring>
using namespace std;
int root, tail = 1, son[100005][27];
char s[65];
void get_trie ( char *s ) {
int len = strlen ( s );
int nd = root, i = 0;
while ( i != len ) {
int v = s[i] - 'A';
if ( son[nd][v] ) {
nd = son[nd][v];
i ++;
} else {
while ( i != len ) {
int vv = s[i] - 'A';
son[nd][vv] = ++tail;
nd = son[nd][vv];
i ++;
}
}
}
}
int main ( ) {
while ( cin >> s ) {
get_trie ( s );
}
printf ( "%d", tail );//输出的是Trie树的节点个数
return 0;
}
在Trie树上构建fail指针(相当于KMP中的nex)表示失配后应跳往继续匹配的节点,然后就可以像KMP一样让文本串在上面尽情匹配啦!
#include<iostream>
#include<cstdio>
#include<queue>
#include<cstring>
using namespace std;
char w[55], t[1000005];
int root, son[500005][27], ed[500005], fail[500005], tail;
queue < int > q;
void get_trie ( char * s ) {
int len = strlen ( s );
int nd = root;
for ( int i = 0; i < len; i ++ ) {
int v = s[i]-'a';
if ( son[nd][v] ) {
nd = son[nd][v];
} else {
son[nd][v] = ++ tail;
nd = son[nd][v];
}
}
ed[nd] += 1;
}
void get_fail ( ) {
for ( int i = 0; i < 26; i ++ )
if ( son[root][i] ) fail[son[root][i]] = root, q.push ( son[root][i] );
while ( !q.empty ( ) ) {
int nd = q.front ( ); q.pop ( );
for ( int i = 0; i < 26; i ++ ) {
if ( son[nd][i] ) fail[son[nd][i]] = son[fail[nd]][i], q.push ( son[nd][i] );
else son[nd][i] = son[fail[nd]][i];
}
}
}
int query ( char *s ) {
int len = strlen ( s );
int nd = root, ans = 0;
for ( int i = 0; i < len; i ++ ) {
int v = s[i] - 'a';
nd = son[nd][v];
int t = nd;
while ( t && ~ed[t] ) {
ans += ed[t]; ed[t] = -1;
t = fail[t];
}
}
return ans;
}
int main ( ) {
int T;
scanf ( "%d", &T );
while ( T -- ) {
int n;
tail = 0;
memset ( ed, 0, sizeof ( ed ) );
memset ( fail, 0, sizeof ( fail ) );
memset ( son, 0, sizeof ( son ) );
scanf ( "%d", &n );
for ( int k = 1; k <= n; k ++ ) {
scanf ( "%s", w );
get_trie ( w );
}
scanf ( "%s", t );
get_fail ( );
int ans = query ( t );
printf ( "%d\n", ans );
}
return 0;
}
然后还有一些奇妙的应用:
1、输出在文本串中出现次数最多的模板串:
#include<iostream>
#include<cstdio>
#include<queue>
#include<cstring>
using namespace std;
char w[155][75], t[1000005];
int root, son[500005][27], ed[500005], fail[500005], tail, tim[155], num[500005];
queue < int > q;
void get_trie ( char * s, int qaq ) {
int len = strlen ( s );
int nd = root;
for ( int i = 0; i < len; i ++ ) {
int v = s[i]-'a';
if ( son[nd][v] ) {
nd = son[nd][v];
} else {
son[nd][v] = ++ tail;
nd = son[nd][v];
}
}
num[nd] = qaq;
}
void get_fail ( ) {
for ( int i = 0; i < 26; i ++ )
if ( son[root][i] ) fail[son[root][i]] = root, q.push ( son[root][i] );
while ( !q.empty ( ) ) {
int nd = q.front ( ); q.pop ( );
for ( int i = 0; i < 26; i ++ ) {
if ( son[nd][i] ) fail[son[nd][i]] = son[fail[nd]][i], q.push ( son[nd][i] );
else son[nd][i] = son[fail[nd]][i];
}
}
}
void query ( char *s ) {
int len = strlen ( s );
int nd = root, ans = 0;
for ( int i = 0; i < len; i ++ ) {
int v = s[i] - 'a';
nd = son[nd][v];
int t = nd;
while ( t ) {
tim[num[t]] += 1;
t = fail[t];
}
}
}
int main ( ) {
int n;
while ( scanf ( "%d", &n ) == 1 ) {
if ( n == 0 ) break;
tail = 0;
memset ( fail, 0, sizeof ( fail ) );
memset ( num, 0, sizeof ( num ) );
memset ( tim, 0, sizeof ( tim ) );
memset ( son, 0, sizeof ( son ) );
for ( int k = 1; k <= n; k ++ ) {
scanf ( "%s", w[k] );
get_trie ( w[k], k );
}
scanf ( "%s", t );
get_fail ( );
query ( t );
int MA = 0;
for ( int i = 1; i <= n; i ++ )
if ( tim[i] > MA ) MA = tim[i];
printf ( "%d\n", MA );
for ( int i = 1; i <= n; i ++ )
if ( tim[i] == MA ) printf ( "%s\n", w[i] );
}
return 0;
}
2、多文本串多模板串计数
#include<iostream>
#include<cstdio>
#include<cstring>
#include<queue>
using namespace std;
int n;
queue < int > q;
char qwq[1000005], a[10005][21];
int root, son[500005][26], end[500005], fail[500005], ntot, las[10005], ans[500005];
void insert ( int qaq ) {
int len = strlen ( qwq );
int nd = root;
for ( int i = 0; i < len; i ++ ) {
int v = qwq[i]-'a';
if ( !son[nd][v] ) son[nd][v] = ++ntot;
nd = son[nd][v];
}
end[nd] ++;
las[qaq] = nd;
}
void get_fail ( ) {
for ( int i = 0; i < 26; i ++ )
if ( son[root][i] ) fail[son[root][i]] = root, q.push ( son[root][i] );
while ( !q.empty ( ) ) {
int u = q.front ( ); q.pop ( );
for ( int i = 0; i < 26; i ++ ) {
if ( son[u][i] ) fail[son[u][i]] = son[fail[u]][i], q.push ( son[u][i] );
else son[u][i] = son[fail[u]][i];
}
}
}
void query ( char *s ) {
int len = strlen ( s );
int nd = root;
for ( int i = 0; i < len; i ++ ) {
nd = son[nd][s[i]-'a'];
int t = nd;
while ( t && end[t] != -1 ) {
ans[t] += end[t], end[t] = -1;
t = fail[t];
}
}
}
int main ( ) {
scanf ( "%d", &n );
for ( int i = 1; i <= n; i ++ ) {
scanf ( "%s", a[i] );
}
int m;
scanf ( "%d", &m );
for ( int i = 1; i <= m; i ++ ) {
scanf ( "%s", qwq );
insert ( i );
}
get_fail ( );
for ( int i = 1; i <= n; i ++ )
query ( a[i] );
for ( int i = 1; i <= m; i ++ )
printf ( "%d\n", ans[las[i]] );
return 0;
}
继续努力。