Description
某人读论文,一篇论文是由许多单词组成。但他发现一个单词会在论文中出现很多次,现在想知道每个单词分别在论文中出现多少次。Input
第一个一个整数N,表示有多少个单词,接下来N行每行一个单词。每个单词由小写字母组成,N<=200,单词长度不超过10^6Output
输出N个整数,第i行的数字表示第i个单词在文章中出现了多少次。Sample Input
3
a
aa
aaa
Sample Output
6
3
1
题意差评,没写到底是每个字符串长10^6还是总长10^6..
这道题有很多做法:
1)用AC自动机嘛.. 就是统计一个子树和就好了(口胡)
2)用SAM做嘛.. 只要统计一个Right数组就可以了啊..
3)SAM建后缀树,这个请见blog:http://blog.csdn.net/werkeytom_ftd/article/details/51154465
这个不是我的,但我很快会去学..
然后这道题对多串有两种处理方法:(1)中间加一个字符 (2)广义SAM
插字符:
#include <cstdio>
#include <cstring>
#include <cstdlib>
#include <algorithm>
using namespace std;
const int Maxn = 2500010;
int F[Maxn], d[Maxn], ch[Maxn][27], tot, now;
char s[Maxn]; int len, n;
int Rsort[Maxn], rk[Maxn], G[Maxn];
int copy ( int p, int c ){
int x = ++tot, y = ch[p][c];
d[x] = d[p]+1;
for ( int i = 0; i < 27; i ++ ) ch[x][i] = ch[y][i];
F[x] = F[y]; F[y] = x;
while ( ~p && ch[p][c] == y ){ ch[p][c] = x; p = F[p]; }
return x;
}
void add ( int c ){
int p, o;
if ( p = ch[now][c] ){
if ( d[p] != d[now]+1 ) copy ( now, c );
now = ch[now][c];
}
else {
d[o=++tot] = d[now]+1; p = now; now = o; G[o] = 1;
while ( ~p && !ch[p][c] ){ ch[p][c] = o; p = F[p]; }
F[o] = ~p ? ( d[p]+1 == d[ch[p][c]] ? ch[p][c] : copy ( p, c ) ) : 0;
}
}
int main (){
int i, j, k;
scanf ( "%d", &n );
len = 0;
for ( i = 1; i <= n; i ++ ){
scanf ( "%s", s+len );
len = strlen (s);
s[len++] = 'a'+26;
}
F[0] = -1;
for ( i = 0; i < len; i ++ ){
add (s[i]-'a');
}
for ( i = 1; i <= tot; i ++ ) Rsort[d[i]] ++;
for ( i = 1; i <= len; i ++ ) Rsort[i] += Rsort[i-1];
for ( i = tot; i >= 1; i -- ) rk[Rsort[d[i]]--] = i;
for ( i = tot; i >= 1; i -- ){
G[F[rk[i]]] += G[rk[i]];
}
j = 0;
for ( i = 1; i <= n; i ++ ){
if ( s[j] == 'a'+26 ) j ++;
now = 0;
while ( s[j] != 'a'+26 ){
int c = s[j]-'a';
now = ch[now][c];
j ++;
}
printf ( "%d\n", G[now] );
}
return 0;
}
广义SAM:
#include <cstdio>
#include <cstring>
#include <cstdlib>
#include <algorithm>
using namespace std;
const int Maxn = 2500010;
int F[Maxn], d[Maxn], ch[Maxn][26], tot, now;
char s[Maxn]; int len, n;
int Rsort[Maxn], rk[Maxn], G[Maxn];
int copy ( int p, int c ){
int x = ++tot, y = ch[p][c];
d[x] = d[p]+1;
for ( int i = 0; i < 26; i ++ ) ch[x][i] = ch[y][i];
F[x] = F[y]; F[y] = x;
while ( ~p && ch[p][c] == y ){ ch[p][c] = x; p = F[p]; }
return x;
}
void add ( int c ){
int p, o;
if ( p = ch[now][c] ){
if ( d[p] != d[now]+1 ) copy ( now, c );
now = ch[now][c]; G[now] ++;
}
else {
d[o=++tot] = d[now]+1; p = now; now = o; G[o] = 1;
while ( ~p && !ch[p][c] ){ ch[p][c] = o; p = F[p]; }
F[o] = ~p ? ( d[p]+1 == d[ch[p][c]] ? ch[p][c] : copy ( p, c ) ) : 0;
}
}
int main (){
int i, j, k;
scanf ( "%d", &n );
len = 0;
for ( i = 1; i <= n; i ++ ){
scanf ( "%s", s+len );
len = strlen (s);
s[len++] = '$';
}
F[0] = -1;
for ( i = 0; i < len; i ++ ){
if ( s[i] != '$' ) add (s[i]-'a');
else now = 0;
}
for ( i = 1; i <= tot; i ++ ) Rsort[d[i]] ++;
for ( i = 1; i <= len; i ++ ) Rsort[i] += Rsort[i-1];
for ( i = tot; i >= 1; i -- ) rk[Rsort[d[i]]--] = i;
for ( i = tot; i >= 1; i -- ){
G[F[rk[i]]] += G[rk[i]];
}
j = 0;
for ( i = 1; i <= n; i ++ ){
if ( s[j] == '$' ) j ++;
now = 0;
while ( s[j] != '$' ){
int c = s[j]-'a';
now = ch[now][c];
j ++;
}
printf ( "%d\n", G[now] );
}
return 0;
}
广义SAM:652ms..
插字符做法:1020ms..