HDU 4787 GRE Words Revenge（在线AC自动机）

最新推荐文章于 2019-02-18 22:58:22 发布

Luffy531

最新推荐文章于 2019-02-18 22:58:22 发布

阅读量1.5k

点赞数 1

分类专栏： ACM算法文章标签： AC自动机在线AC自动机

本文链接：https://blog.csdn.net/u014357885/article/details/48351371

版权

ACM算法专栏收录该内容

106 篇文章 0 订阅

订阅专栏

题目链接：http://acm.hdu.edu.cn/showproblem.php?pid=4787

题意：Coach Pang学习英语单词，总共有n个操作，2种操作。每行读入一个字符串。

如果字符串以+开头，此为单词（即模式串，不考虑重复）
如果字符串以？开头，此为文章（即文本串，查询在此之前的单词在文本串中出现的次数）

需要注意的是，文章是被加密过的，加密的方法就是将文章看作一个环，每次旋转上一次询问的答案次数。具体看输入即可。

思路：如果只建一个AC自动机的话，每次插入单词后的新查询之前都需要重新对整个AC自动机重新求一遍失配边，这样的复杂度略高，最坏的情况就是一次插入一次查询。

作出的优化是：因为考虑到会新加入单词和较为频繁的getfail()，那么对于已有的单词和新加入的单词建2个AC自动机（多个也可以？），已有的单词所在的AC自动机ac即可以不需要getfail()，只对新的加入单词所在的AC自动机buf调用getfail()函数，然后将2个ac自动机的find()函数的结果相加即得到最终答案。这样做好处在于，每次getfail()的时候避免了大量重复的求一些已知的（不能完全说是已知把，合并后的某些结点的失配边会改变，但并不会影响最终答案）失配边。当buf的节点数超过一定量（如何控制？）时，即将buf的字典树合并到ac的字典树中，再对ac求一次getfail()，然后清空buf，继续添加。据说复杂度O（L * sqrt（L））

参考大牛题解：http://blog.csdn.net/no__stop/article/details/16823479

代码

#include <iostream>
#include <stdio.h>
#include <string.h>
#include <math.h>
#include <algorithm>
#include <string>
#include <vector>
#include <map>
#include <queue>
#include <stack>

using namespace std;

#define lson l, m, rt << 1
#define rson m + 1, r, rt << 1 | 1
#define ceil(x, y) (((x) + (y) - 1) / (y))

const int SIZE = 2;
const int N = 1e5 + 10;
const int M = 1e3 + 10;
const int INF = 0x7f7f7f7f;
const int MAX_WORD = 5e6 + 10;
const double EPS = 1e-9;
const int MOD = 2015;
const int TH = 2e3;

struct AC {
    int sz;
    int ch[N][SIZE];
    bool ed[N];
    int f[N];

    int newnode() {
        memset(ch[sz], 0, sizeof(ch[sz]));
        ed[sz] = false;
        f[sz] = 0;
        return sz++;
    }

    void init() {
        sz = 0;
        newnode();
    }

    void insert(char *s) {
        int u = 0;
        for (int i = 1; s[i]; i++) {
            int v = s[i] - '0';
            if (!ch[u][v])
                ch[u][v] = newnode();
            u = ch[u][v];
        }
        ed[u] = true;
    }

    void getfail() {
        queue<int> q;
        for (int i = 0; i < SIZE; i++)
            if (ch[0][i])
                q.push(ch[0][i]);

        while (!q.empty()) {
            int r = q.front();
            q.pop();
            for (int i = 0; i < SIZE; i++) {
                int v = ch[r][i];
                if (v) {
                    q.push(v);
                    int u = f[r];
                    while (u && !ch[u][i]) u = f[u];
                    f[v] = ch[u][i];
                }
            }
        }
    }

    int find(char *s) {
        int t = 0;
        int u = 0;
        for (int i = 1; s[i]; i++) {
            int v = s[i] - '0';
            while (u && !ch[u][v]) u = f[u];
            u = ch[u][v];
            int p = u;
            while (p) {
                if (ed[p])
                    t++;
                p = f[p];
            }
        }
        return t;
    }
}ac, buf;//建2个ac自动机，buf用来存少量的数据

int ans;
char str[MAX_WORD];

void init() {
    ac.init();
    buf.init();
}

void dfs(int r1, int r2) {//将buf中以r2为根结点的树合并到ac中以r1为根结点的树中
    for (int i = 0; i < SIZE; i++) {
        if (buf.ch[r2][i]) {
            if (!ac.ch[r1][i])
                ac.ch[r1][i] = ac.newnode();
            int t = ac.ch[r1][i];
            ac.ed[t] |= buf.ed[buf.ch[r2][i]];
            dfs(ac.ch[r1][i], buf.ch[r2][i]);
        }
    }
}

void join() {
    dfs(0, 0);      //暴力把buf的字典树合并到ac的字典树中去
    buf.init();     //清空buf
    ac.getfail();   //求新的ac自动机的失配边
}

void _swap(int is, int ie, int js, int je) {
    int leni = ie - is + 1, lenj = je - js + 1;
    if (leni > 0 && lenj > 0) {
        for (int i = 0; i < min(leni, lenj); i++)
            swap(str[is + i], str[js + i]);
        if (leni > lenj)
            _swap(is + lenj, ie, js, je);
        else
            _swap(js, js + leni - 1, js + leni, je);
    }
}

int main() {        
    int t_case;
    scanf("%d", &t_case);
    for (int i_case = 1; i_case <= t_case; i_case++) {
        int n;
        scanf("%d", &n);
        init();
        ans = 0;
        printf("Case #%d:\n", i_case);
        for (int i = 0; i < n; i++) {
            scanf("%s", str);
            int len = strlen(str);
            int k = ans % (len - 1);
            _swap(1, k, k + 1, len - 1);
            if (str[0] == '+') {
                buf.insert(str);
                buf.getfail();
                if (buf.sz > TH)//超出阈值,则合并
                    join();
            }
            else {
                ans = ac.find(str) + buf.find(str);
                printf("%d\n", ans);
            }
        }
    }
    return 0;
}