HDU 2222 -- AC自动机

题目链接:http://acm.hdu.edu.cn/showproblem.php?pid=2222
题目大意:给你一些子串,问有多少个出现在了主串里
思路:多串匹配问题,直接用AC自动机来做

AC代码:

#include<iostream>
#include<cstdio>
#include<cstring>
#include<queue>
using namespace std;

const int maxn = 1e6+5;
const int maxl = 50*10005;      //所有子串总长度
const int sigma_size = 26;
int n;
char t[55], s[maxn];

struct AC
{
    int ch[maxl][26];
    int val[maxl], fail[maxl], last[maxl];      //val为结点权值,fail为失配数组
    int last[maxl]      //后缀链接,为失配指针往回走时遇到的下一个单词的结点编号
    int sz;             //结点总数
    void clear(){       //初始化一个根节点
        memset(ch[0], 0, sizeof(ch[0]));
        sz = 1;
    }
    int idx(char x){return x-'a';}
    void insert(char *s)
    {
        int u = 0;
        int n = strlen(s);
        for(int i = 0; i < n; i++){
            int c = idx(s[i]);
            if(!ch[u][c]){          
                memset(ch[sz], 0, sizeof(ch[sz]));
                val[sz] = 0;        //中间结点权值为0
                ch[u][c] = sz++;    //新建结点
            }
            u = ch[u][c];   //往下走
        }
        val[u]++;
    }
    void getfail()
    {
        queue<int> q;
        fail[0] = 0;
        int u = 0;
        for(int i = 0; i < sigma_size; i++){    //初始化队列
            u = ch[0][i];
            if(u){
                q.push(u);
                fail[u] = 0;
                last[u] = 0;
            }
        }
        //按BFS顺序计算失配函数
        while(!q.empty()){
            int r = q.front();
            q.pop();
            for(int i = 0; i < sigma_size; i++){
                u = ch[r][i];
                if(!u){
                    ch[r][i] = ch[fail[r]][i];      //将不存在的边补上
                    continue;
                }
                q.push(u);
                int v = fail[r];
                while(v && !ch[v][i])   v = fail[v];
                fail[u] = ch[v][i];
                last[u] = val[fail[u]] ? fail[u] : last[fail[u]];
            }
        }
    }
    int find(char *s)
    {
        int u = 0, cnt = 0;
        int n = strlen(s);
        for(int i = 0; i < n; i++){
            int c = idx(s[i]);
            u = ch[u][c];
            int temp = 0;
            if(val[u])  temp = u;
            else if(last[u])    temp = last[u];
            while(temp){
                cnt += val[temp];
                val[temp] = 0;
                temp = last[temp];
            }
        }
        return cnt;
    }
}tree;

int main()
{
    int T;
    cin >> T;
    while(T--){
        scanf("%d", &n);
        tree.clear();
        while(n--){
            scanf("%s", t);
            tree.insert(t);
        }
        tree.getfail();
        scanf("%s", s);
        int ans = tree.find(s);
        cout << ans << '\n';
    }
    return 0;
}

关于AC自动机:
将所有模式串建成一个大的状态转移图,由Trie+失配边组成
下面是Geeks for Geeks网上的AC自动机代码,在使用时发现数据较大时该代码易造成内存失配,但该代码注释较好(但是我看不懂英文)
Inupt:

    text = "ahishers"    
    arr[] = {"he", "she", "hers", "his"}

Output:

   Word his appears from 1 to 3
   Word he appears from 4 to 5
   Word she appears from 3 to 5
   Word hers appears from 4 to 7
// C++ program for implementation of Aho Corasick algorithm
// for string matching
using namespace std;
#include <bits/stdc++.h>

// Max number of states in the matching machine.
// Should be equal to the sum of the length of all keywords.
const int MAXS = 500;

// Maximum number of characters in input alphabet
const int MAXC = 26;

// OUTPUT FUNCTION IS IMPLEMENTED USING out[]
// Bit i in this mask is one if the word with index i
// appears when the machine enters this state.
int out[MAXS];

// FAILURE FUNCTION IS IMPLEMENTED USING f[]
int f[MAXS];

// GOTO FUNCTION (OR TRIE) IS IMPLEMENTED USING g[][]
int g[MAXS][MAXC];

// Builds the string matching machine.
// arr -   array of words. The index of each keyword is important:
//         "out[state] & (1 << i)" is > 0 if we just found word[i]
//         in the text.
// Returns the number of states that the built machine has.
// States are numbered 0 up to the return value - 1, inclusive.
int buildMatchingMachine(string arr[], int k)
{
    // Initialize all values in output function as 0.
    memset(out, 0, sizeof out);

    // Initialize all values in goto function as -1.
    memset(g, -1, sizeof g);

    // Initially, we just have the 0 state
    int states = 1;

    // Construct values for goto function, i.e., fill g[][]
    // This is same as building a Trie for arr[]
    for (int i = 0; i < k; ++i)
    {
        const string &word = arr[i];
        int currentState = 0;

        // Insert all characters of current word in arr[]
        for (int j = 0; j < word.size(); ++j)
        {
            int ch = word[j] - 'a';

            // Allocate a new node (create a new state) if a
            // node for ch doesn't exist.
            if (g[currentState][ch] == -1)
                g[currentState][ch] = states++;

            currentState = g[currentState][ch];
        }

        // Add current word in output function
        out[currentState] |= (1 << i);
    }

    // For all characters which don't have an edge from
    // root (or state 0) in Trie, add a goto edge to state
    // 0 itself
    for (int ch = 0; ch < MAXC; ++ch)
        if (g[0][ch] == -1)
            g[0][ch] = 0;

    // Now, let's build the failure function

    // Initialize values in fail function
    memset(f, -1, sizeof f);

    // Failure function is computed in breadth first order
    // using a queue
    queue<int> q;

     // Iterate over every possible input
    for (int ch = 0; ch < MAXC; ++ch)
    {
        // All nodes of depth 1 have failure function value
        // as 0. For example, in above diagram we move to 0
        // from states 1 and 3.
        if (g[0][ch] != 0)
        {
            f[g[0][ch]] = 0;
            q.push(g[0][ch]);
        }
    }

    // Now queue has states 1 and 3
    while (q.size())
    {
        // Remove the front state from queue
        int state = q.front();
        q.pop();

        // For the removed state, find failure function for
        // all those characters for which goto function is
        // not defined.
        for (int ch = 0; ch <= MAXC; ++ch)
        {
            // If goto function is defined for character 'ch'
            // and 'state'
            if (g[state][ch] != -1)
            {
                // Find failure state of removed state
                int failure = f[state];

                // Find the deepest node labeled by proper
                // suffix of string from root to current
                // state.
                while (g[failure][ch] == -1)
                      failure = f[failure];

                failure = g[failure][ch];
                f[g[state][ch]] = failure;

                // Merge output values
                out[g[state][ch]] |= out[failure];

                // Insert the next level node (of Trie) in Queue
                q.push(g[state][ch]);
            }
        }
    }

    return states;
}

// Returns the next state the machine will transition to using goto
// and failure functions.
// currentState - The current state of the machine. Must be between
//                0 and the number of states - 1, inclusive.
// nextInput - The next character that enters into the machine.
int findNextState(int currentState, char nextInput)
{
    int answer = currentState;
    int ch = nextInput - 'a';

    // If goto is not defined, use failure function
    while (g[answer][ch] == -1)
        answer = f[answer];

    return g[answer][ch];
}

// This function finds all occurrences of all array words
// in text.
void searchWords(string arr[], int k, string text)
{
    // Preprocess patterns.
    // Build machine with goto, failure and output functions
    buildMatchingMachine(arr, k);

    // Initialize current state
    int currentState = 0;

    // Traverse the text through the nuilt machine to find
    // all occurrences of words in arr[]
    for (int i = 0; i < text.size(); ++i)
    {
        currentState = findNextState(currentState, text[i]);

        // If match not found, move to next state
        if (out[currentState] == 0)
             continue;

        // Match found, print all matching words of arr[]
        // using output function.
        for (int j = 0; j < k; ++j)
        {
            if (out[currentState] & (1 << j))
            {
                cout << "Word " << arr[j] << " appears from "
                     << i - arr[j].size() + 1 << " to " << i << endl;
            }
        }
    }
}

// Driver program to test above
int main()
{
    string arr[] = {"he", "she", "hers", "his"};
    string text = "ahishers";
    int k = sizeof(arr)/sizeof(arr[0]);

    searchWords(arr, k, text);

    return 0;
}
  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值