AC自动机

AC自动机

AC自动机是KMP和Trie的结合,主要处理多模板串匹配问题。下面推荐一个博客,有助于学习AC自动机。

这里还有一个Kuangbin开的 比赛,大家也可以做一下,加深对算法的理解。

下面是比赛中的题目,采用了notonlysuccess的模板。
HDU 2222 Keywords Search
题意:最裸的模板题,给定一些模板串以及一个文本串,要在文本串中找有多少个模板串。

/*
ID: wuqi9395@126.com
PROG:
LANG: C++
*/
#include<map>
#include<set>
#include<queue>
#include<stack>
#include<cmath>
#include<cstdio>
#include<vector>
#include<string>
#include<fstream>
#include<cstring>
#include<ctype.h>
#include<iostream>
#include<algorithm>
#define INF (1<<30)
#define PI acos(-1.0)
#define mem(a, b) memset(a, b, sizeof(a))
#define rep(i, n) for (int i = 0; i < n; i++)
#define debug puts("===============")
typedef long long ll;
using namespace std;
const int maxnode = 500100;
const int charset = 26;
struct ACAutomaton {
    int ch[maxnode][charset];
    int fail[maxnode];
    int Q[maxnode];
    int val[maxnode];
    int sz;
    int ID[128];

    //初始化,计算字母对应的儿子ID,如:'a'->0 ... 'z'->25
    void init() {
        fail[0] = 0;
        for (int i = 0; i < charset; i++) ID[i + 'a'] = i;
    }
    //重新建树需先Reset
    void reset() {
        memset(ch[0], 0, sizeof(ch[0]));
        sz = 1;
    }
    //将权值为key的字符串a插入到trie中
    void Insert(char *s, int key) {
        int u = 0;
        for ( ; *s; s++) {
            int c = ID[*s];
            if (!ch[u][c]) {
                memset(ch[sz], 0, sizeof(ch[sz]));
                val[sz] = 0;
                ch[u][c] = sz++;
            }
            u = ch[u][c];
        }
        val[u] += key;
    }
    //建立AC自动机,确定每个节点的权值以及状态转移
    void Construct () {
        int *s = Q, *e = Q;
        for (int i = 0; i < charset; i++) {
            if (ch[0][i]) {
                fail[ch[0][i]] = 0;
                *e ++ = ch[0][i];
            }
        }
        while(s != e) {
            int u = *s++;
            for (int i = 0; i < charset; i++) {
                int &v = ch[u][i];
                if (ch[u][i]) {
                    *e ++ = v;
                    fail[v] = ch[fail[u]][i];
                } else {
                    v = ch[fail[u]][i];
                }
            }
        }
    }
    //最基础的查询,询问一个字符串中出现了多少模板串
    int query(char *s) {
        int ans = 0, u = 0;
        for ( ; *s; s++) {
            int c = ID[*s];
            u = ch[u][c];
            int tmp = u;
            while(tmp) {
                ans += val[tmp];
                val[tmp] = 0;
                tmp = fail[tmp];
            }
        }
        return ans;
    }
}AC;
char str[1000100];
int main() {
    AC.init();
    int t, n;
    scanf("%d", &t);
    while(t--) {
        scanf("%d", &n);
        AC.reset();
        for (int i = 0; i < n; i++) {
            scanf("%s", str);
            AC.Insert(str, 1);
        }
        AC.Construct();
        scanf("%s", str);
        printf("%d\n", AC.query(str));
    }
    return 0;
}


HDU 2896 病毒侵袭
题意:有N个病毒,M个文本串,问每个文本串出现了多少个病毒,分别是哪些?一共有多少个文本串出现了病毒?
思路:这道题的病毒可以包含所有可见ASC码值

/*
ID: wuqi9395@126.com
PROG:
LANG: C++
*/
#include<map>
#include<set>
#include<queue>
#include<stack>
#include<cmath>
#include<cstdio>
#include<vector>
#include<string>
#include<fstream>
#include<cstring>
#include<ctype.h>
#include<iostream>
#include<algorithm>
#define INF (1<<30)
#define PI acos(-1.0)
#define mem(a, b) memset(a, b, sizeof(a))
#define rep(i, n) for (int i = 0; i < n; i++)
#define debug puts("===============")
typedef long long ll;
using namespace std;
const int maxnode = 100100;
const int charset = 128;
struct ACAutomaton {
    int ch[maxnode][charset];
    int fail[maxnode];
    int Q[maxnode];
    int val[maxnode];
    int sz;
    int ID[128];
    void init() {
        fail[0] = 0;
        for (int i = 0; i < charset; i++) ID[i] = i;
    }
    void reset() {
        sz = 1;
        memset(ch[0], 0, sizeof(ch[0]));
    }
    void Insert(char *s, int key) {
        int u = 0;
        for ( ; *s; s++) {
            int c = ID[*s];
            if (!ch[u][c]) {
                memset(ch[sz], 0, sizeof(ch[sz]));
                val[sz] = 0;
                ch[u][c] = sz++;
            }
            u = ch[u][c];
        }
        val[u] = key;
    }
    void Construct () {
        int *s = Q, *e = Q;
        for (int i = 0; i < charset; i++) {
            if (ch[0][i]) {
                *e++ = ch[0][i];
                fail[ch[0][i]] = 0;
            }
        }
        while(s != e) {
            int u = *s++;
            for (int i = 0; i < charset; i++) {
                int &v = ch[u][i];
                if (v) {
                    *e++ = v;
                    fail[v] = ch[fail[u]][i];
                } else {
                    v = ch[fail[u]][i];
                }
            }
        }
    }
    void query(char *s, int &tot, int id) {
        int ans = 0, u = 0;
        set<int> S;
        set<int>::iterator it;
        S.clear();
        for (; *s; s++) {
            int c = ID[*s];
            u = ch[u][c];
            int tmp = u;
            while(tmp) {
                if (val[tmp]) S.insert(val[tmp]), ans++;
                tmp = fail[tmp];
            }
        }
        if (ans) {
            printf("web %d:", id);
            for (it = S.begin(); it != S.end(); it++) printf(" %d", *it);
            putchar('\n');
            tot++;
        }
    }
}AC;
char buf[210], str[10100];
int main () {
    int n, m, tot = 0;
    scanf("%d", &n);
    AC.init();
    AC.reset();
    for (int i = 0; i < n; i++) {
        scanf("%s", buf);
        AC.Insert(buf, i + 1);
    }
    AC.Construct();
    scanf("%d", &m);
    for (int i = 0; i < m; i++) {
        scanf("%s", str);
        AC.query(str, tot, i + 1);
    }
    printf("total: %d\n", tot);
    return 0;
}

HDU 3065 病毒侵袭持续中
题意:有N个病毒,一个文本串,问文本串中每个病毒出现了多少次
思路:也是基础的模板,是多case。。

/*
ID: wuqi9395@126.com
PROG:
LANG: C++
*/
#include<map>
#include<set>
#include<queue>
#include<stack>
#include<cmath>
#include<cstdio>
#include<vector>
#include<string>
#include<fstream>
#include<cstring>
#include<ctype.h>
#include<iostream>
#include<algorithm>
#define INF (1<<30)
#define PI acos(-1.0)
#define mem(a, b) memset(a, b, sizeof(a))
#define rep(i, n) for (int i = 0; i < n; i++)
#define debug puts("===============")
typedef long long ll;
using namespace std;
const int maxnode = 50010;
const int charset = 128;
int cnt[1100];
struct ACAutomaton {
    int ch[maxnode][charset];
    int fail[maxnode];
    int Q[maxnode];
    int val[maxnode];
    int sz;
    int ID[128];
    void init() {
        fail[0] = 0;
        for (int i = 0; i < charset; i++) ID[i] = i;
    }
    void reset() {
        sz = 1;
        memset(ch[0], 0, sizeof(ch[0]));
    }
    void Insert(char *s, int key) {
        int u = 0;
        for ( ; *s; s++) {
            int c = ID[*s];
            if (!ch[u][c]) {
                memset(ch[sz], 0, sizeof(ch[sz]));
                val[sz] = 0;
                ch[u][c] = sz++;
            }
            u = ch[u][c];
        }
        val[u] = key;
    }
    void Construct () {
        int *s = Q, *e = Q;
        for (int i = 0; i < charset; i++) {
            if (ch[0][i]) {
                *e++ = ch[0][i];
                fail[ch[0][i]] = 0;
            }
        }
        while(s != e) {
            int u = *s++;
            for (int i = 0; i < charset; i++) {
                int &v = ch[u][i];
                if (v) {
                    *e++ = v;
                    fail[v] = ch[fail[u]][i];
                } else {
                    v = ch[fail[u]][i];
                }
            }
        }
    }
    void query(char *s) {
        int u = 0;
        for (; *s; s++) {
            int c = ID[*s];
            u = ch[u][c];
            int tmp = u;
            while(tmp) {
                if (val[tmp]) cnt[val[tmp]]++;
                tmp = fail[tmp];
            }
        }
    }
} AC;
char buf[1100][55], str[2000100];
int main () {
    int n, m, tot = 0;
    AC.init();
    while(~scanf("%d", &n)) {
        AC.reset();
        for (int i = 0; i < n; i++) {
            scanf("%s", buf[i]);
            AC.Insert(buf[i], i + 1);
            cnt[i + 1] = 0;
        }
        AC.Construct();
        scanf("%s", str);
        AC.query(str);
        for (int i = 1; i <= n; i++) if (cnt[i]) printf("%s: %d\n", buf[i - 1], cnt[i]);
    }
    return 0;
}

ZOJ 3430 Detect the Virus
题意:有一种编码方式,将输进来的字符转化为二进制,然后6个为一组,不足补零,得到一个新的数字,每个数字对应一个字符(见题面)。现在给你已经编码过的n个病毒,和m个编码过的文本串,问每个文本串各包含多少种病毒。
思路:这里反编码的时候,会发现可能有256种状态,所以不能用字符串表示。反编码之后就是裸的AC自动机。
/*
ID: wuqi9395@126.com
PROG:
LANG: C++
*/
#include<map>
#include<set>
#include<queue>
#include<stack>
#include<cmath>
#include<cstdio>
#include<vector>
#include<string>
#include<fstream>
#include<cstring>
#include<ctype.h>
#include<iostream>
#include<algorithm>
#define INF (1<<30)
#define PI acos(-1.0)
#define mem(a, b) memset(a, b, sizeof(a))
#define rep(i, n) for (int i = 0; i < n; i++)
#define debug puts("===============")
typedef long long ll;
using namespace std;
const int maxnode = 510 * 64;
const int charset = 256;
struct ACAutomaton {
    int ch[maxnode][charset];
    int fail[maxnode];
    int Q[maxnode];
    int val[maxnode];
    int sz;
    int ID[256];
    void init() {
        fail[0] = 0;
        //for (int i = 0; i < charset; i++) ID[i] = i;
    }
    void reset() {
        sz = 1;
        memset(ch[0], 0, sizeof(ch[0]));
    }
    void Insert(unsigned char s[], int key, int len) {
        int u = 0;
        for (int i = 0; i < len; i++) {
            int c = s[i];
            if (!ch[u][c]) {
                memset(ch[sz], 0, sizeof(ch[sz]));
                val[sz] = 0;
                ch[u][c] = sz++;
            }
            u = ch[u][c];
        }
        val[u] = key;
    }
    void Construct () {
        int *s = Q, *e = Q;
        for (int i = 0; i < charset; i++) {
            if (ch[0][i]) {
                *e++ = ch[0][i];
                fail[ch[0][i]] = 0;
            }
        }
        while(s != e) {
            int u = *s++;
            for (int i = 0; i < charset; i++) {
                int &v = ch[u][i];
                if (v) {
                    *e++ = v;
                    fail[v] = ch[fail[u]][i];
                } else {
                    v = ch[fail[u]][i];
                }
            }
        }
    }
    void query(unsigned char s[], int len) {
        int u = 0, ans = 0;
        bool vis[520] = {0};
        for (int i = 0; i < len; i++) {
            int c = s[i];
            u = ch[u][c];
            int tmp = u;
            while(tmp) {
                if (val[tmp] && !vis[val[tmp]]) {
                    ans++, vis[val[tmp]] = 1;
                }
                tmp = fail[tmp];
            }
        }
        printf("%d\n", ans);
    }
} AC;
char s[4000];
unsigned char g[4000];
unsigned char now[4000];
void get(char *s, int len) {
    for (int i = 0; i < len; i++) {
        if (s[i] >= 'A' && s[i] <= 'Z') g[i] = s[i] - 'A';
        else if (s[i] >= 'a' && s[i] <= 'z') g[i] = s[i] - 'a' + 26;
        else if (s[i] >= '0' && s[i] <= '9') g[i] = s[i] - '0' + 52;
        else if (s[i] == '+') g[i] = 62;
        else g[i] = 63;
    }
    g[len] = 0;
}
int change(unsigned char g[], int len) {
    int cnt = 0;
    for (int i = 0; i < len; i += 4) {
        now[cnt++] = (g[i] << 2) | (g[i + 1] >> 4);
        if (i + 2 < len) now[cnt++] = (g[i + 1] << 4) | (g[i + 2] >> 2);
        if (i + 3 < len) now[cnt++] = (g[i + 2] << 6) | g[i + 3];
    }
    return cnt;
}
int main () {
    int n, m;
    AC.init();
    while(~scanf("%d", &n)) {
        AC.reset();
        for (int i = 0; i < n; i++) {
            scanf("%s", s);
            int len = strlen(s);
            while(s[len - 1] == '=') len--;
            get(s, len);
            int cnt = change(g, len);
            AC.Insert(now, i + 1, cnt);
        }
        AC.Construct();
        scanf("%d", &m);
        while(m--) {
            scanf("%s", s);
            int len = strlen(s);
            while(s[len - 1] == '=') len--;
            get(s, len);
            int cnt = change(g, len);
            AC.query(now, cnt);
        }
        putchar('\n');
    }
    return 0;
}

POJ 2778 DNA Sequence
题意:DNA的序列由ACTG四个字母组成,现在给定m个不可行的序列。问随机构成的长度为n的序列中,有多少种序列是可行的(只要包含一个不可行序列便不可行)。个数很大,对100000取模。 
思路:AC自动机 + DP  解题报告

HDU 2243 考研路茫茫――单词情结
题意:给定一些词根,如果一个单词包含有词根,则认为是有效的。现在问长度不超过L的单词里面,有多少有效的单词?

ZOJ 2619 Generator
题意:给定一个数N,代表可以选前N个字母。然后给定一个仅有前N个字母组成的字符串,问从空串开始构造,每次可以在已有基础上从前N个字母中挑选一个加在后面,问构造的字符串的长度期望是多少?


持续更新中

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值