AC自动机

在东北集训期间学了好多玄妙的东西…

Trie树构建一波——

#include<iostream>
#include<cstdio>
#include<cstring>
using namespace std;

int root, tail = 1, son[100005][27];
char s[65];

void get_trie ( char *s ) {
    int len = strlen ( s );
    int nd = root, i = 0;
    while ( i != len ) {
        int v = s[i] - 'A';
        if ( son[nd][v] ) {
            nd = son[nd][v];
            i ++;
        } else {
            while ( i != len ) {
                int vv = s[i] - 'A';
                son[nd][vv] = ++tail;
                nd = son[nd][vv];
                i ++;
            }
        }
    }
}

int main ( ) {
    while ( cin >> s ) {
        get_trie ( s );
    }
    printf ( "%d", tail );//输出的是Trie树的节点个数
    return 0;
}

在Trie树上构建fail指针(相当于KMP中的nex)表示失配后应跳往继续匹配的节点,然后就可以像KMP一样让文本串在上面尽情匹配啦!

#include<iostream>
#include<cstdio>
#include<queue>
#include<cstring>
using namespace std;

char w[55], t[1000005];
int root, son[500005][27], ed[500005], fail[500005], tail;
queue < int > q;

void get_trie ( char * s ) {
    int len = strlen ( s );
    int nd = root;
    for ( int i = 0; i < len; i ++ ) {
        int v = s[i]-'a';
        if ( son[nd][v] ) {
            nd = son[nd][v];
        } else {
            son[nd][v] = ++ tail;
            nd = son[nd][v];
        }
    }
    ed[nd] += 1;
}

void get_fail ( ) {
    for ( int i = 0; i < 26; i ++ )
        if ( son[root][i] ) fail[son[root][i]] = root, q.push ( son[root][i] );
    while ( !q.empty ( ) ) {
        int nd = q.front ( ); q.pop ( );
        for ( int i = 0; i < 26; i ++ ) {
            if ( son[nd][i] ) fail[son[nd][i]] = son[fail[nd]][i], q.push ( son[nd][i] );
            else son[nd][i] = son[fail[nd]][i];
        }
    }
}

int query ( char *s ) {
    int len = strlen ( s );
    int nd = root, ans = 0;
    for ( int i = 0; i < len; i ++ ) {
        int v = s[i] - 'a';
        nd = son[nd][v];
        int t = nd;
        while ( t && ~ed[t] ) {
            ans += ed[t]; ed[t] = -1;
            t = fail[t];
        }
    }
    return ans;
}

int main ( ) {
    int T;
    scanf ( "%d", &T );
    while ( T -- ) {
        int n;
        tail = 0;
        memset ( ed, 0, sizeof ( ed ) );
        memset ( fail, 0, sizeof ( fail ) );
        memset ( son, 0, sizeof ( son ) );
        scanf ( "%d", &n );
        for ( int k = 1; k <= n; k ++ ) {
            scanf ( "%s", w );
            get_trie ( w );
        }
        scanf ( "%s", t );
        get_fail ( );
        int ans = query ( t );
        printf ( "%d\n", ans );
    }
    return 0;
}
然后还有一些奇妙的应用:

1、输出在文本串中出现次数最多的模板串:

#include<iostream>
#include<cstdio>
#include<queue>
#include<cstring>
using namespace std;

char w[155][75], t[1000005];
int root, son[500005][27], ed[500005], fail[500005], tail, tim[155], num[500005];
queue < int > q;

void get_trie ( char * s, int qaq ) {
    int len = strlen ( s );
    int nd = root;
    for ( int i = 0; i < len; i ++ ) {
        int v = s[i]-'a';
        if ( son[nd][v] ) {
            nd = son[nd][v];
        } else {
            son[nd][v] = ++ tail;
            nd = son[nd][v];
        }
    }
    num[nd] = qaq;
}

void get_fail ( ) {
    for ( int i = 0; i < 26; i ++ )
        if ( son[root][i] ) fail[son[root][i]] = root, q.push ( son[root][i] );
    while ( !q.empty ( ) ) {
        int nd = q.front ( ); q.pop ( );
        for ( int i = 0; i < 26; i ++ ) {
            if ( son[nd][i] ) fail[son[nd][i]] = son[fail[nd]][i], q.push ( son[nd][i] );
            else son[nd][i] = son[fail[nd]][i];
        }
    }
}

void query ( char *s ) {
    int len = strlen ( s );
    int nd = root, ans = 0;
    for ( int i = 0; i < len; i ++ ) {
        int v = s[i] - 'a';
        nd = son[nd][v];
        int t = nd;
        while ( t ) {
            tim[num[t]] += 1;
            t = fail[t];
        }
    }
}

int main ( ) {
    int n;
    while ( scanf ( "%d", &n ) == 1 ) {
        if ( n == 0 ) break;
        tail = 0;
        memset ( fail, 0, sizeof ( fail ) );
        memset ( num, 0, sizeof ( num ) );
        memset ( tim, 0, sizeof ( tim ) );
        memset ( son, 0, sizeof ( son ) );
        for ( int k = 1; k <= n; k ++ ) {
            scanf ( "%s", w[k] );
            get_trie ( w[k], k );
        }
        scanf ( "%s", t );
        get_fail ( );
        query ( t );
        int MA = 0;
        for ( int i = 1; i <= n; i ++ )
            if ( tim[i] > MA ) MA = tim[i];
        printf ( "%d\n", MA );
        for ( int i = 1; i <= n; i ++ )
            if ( tim[i] == MA ) printf ( "%s\n", w[i] );
    }
    return 0;
}

2、多文本串多模板串计数

#include<iostream>
#include<cstdio>
#include<cstring>
#include<queue>
using namespace std;

int n;
queue < int > q;
char qwq[1000005], a[10005][21];
int root, son[500005][26], end[500005], fail[500005], ntot, las[10005], ans[500005];

void insert ( int qaq ) {
    int len = strlen ( qwq );
    int nd = root;
    for ( int i = 0; i < len; i ++ ) {
        int v = qwq[i]-'a';
        if ( !son[nd][v] ) son[nd][v] = ++ntot;
        nd = son[nd][v];
    }
    end[nd] ++;
    las[qaq] = nd;
}

void get_fail ( ) {
    for ( int i = 0; i < 26; i ++ )
        if ( son[root][i] ) fail[son[root][i]] = root, q.push ( son[root][i] );
    while ( !q.empty ( ) ) {
        int u = q.front ( ); q.pop ( );
        for ( int i = 0; i < 26; i ++ ) {
            if ( son[u][i] ) fail[son[u][i]] = son[fail[u]][i], q.push ( son[u][i] );
            else son[u][i] = son[fail[u]][i];
        }
    }
}

void query ( char *s ) {
    int len = strlen ( s );
    int nd = root;
    for ( int i = 0; i < len; i ++ ) {
        nd = son[nd][s[i]-'a'];
        int t = nd;
        while ( t && end[t] != -1 ) {
            ans[t] += end[t], end[t] = -1;
            t = fail[t];
        }
    }
}

int main ( ) {
    scanf ( "%d", &n );
    for ( int i = 1; i <= n; i ++ ) {
        scanf ( "%s", a[i] );
    }
    int m;
    scanf ( "%d", &m );
    for ( int i = 1; i <= m; i ++ ) {
        scanf ( "%s", qwq );
        insert ( i );
    }
    get_fail ( );
    for ( int i = 1; i <= n; i ++ )
        query ( a[i] );
    for ( int i = 1; i <= m; i ++ )
        printf ( "%d\n", ans[las[i]] );
    return 0;
}

继续努力。

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
Python AC自动机是一个用于字符串匹配的算法,它可以高效地在一段文本中查找多个预定义的模式。它的实现可以使用多种库,其中包括ac自动机python和ahocorasick-python。 ac自动机python是一个对标准的ac自动机算法进行了完善和优化的实现,适用于主流的Python发行版,包括Python2和Python3。它提供了更准确的结果,并且可以通过pip进行安装,具体的安装方法可以参考官方文档或者使用pip install命令进行安装。 ahocorasick-python是另一个实现AC自动机的库,它也可以用于Python2和Python3。你可以通过官方网站或者GitHub源码获取更多关于该库的信息和安装指南。 对于AC自动机的使用,一个常见的例子是在一段包含m个字符的文章中查找n个单词出现的次数。要了解AC自动机,需要有关于模式树(字典树)Trie和KMP模式匹配算法的基础知识。AC自动机的算法包括三个步骤:构造一棵Trie树,构造失败指针和模式匹配过程。在构造好AC自动机后,可以使用它来快速地在文本中查找预定义的模式,并统计它们的出现次数。<span class="em">1</span><span class="em">2</span><span class="em">3</span> #### 引用[.reference_title] - *1* [ahocorasick-python:AC自动机python的实现,并进行了优化。 主要修复了 查询不准确的问题](https://download.csdn.net/download/weixin_42122986/18825869)[target="_blank" data-report-click={"spm":"1018.2226.3001.9630","extra":{"utm_source":"vip_chatgpt_common_search_pc_result","utm_medium":"distribute.pc_search_result.none-task-cask-2~all~insert_cask~default-1-null.142^v93^chatsearchT3_2"}}] [.reference_item style="max-width: 50%"] - *2* *3* [Python实现多模匹配——AC自动机](https://blog.csdn.net/zichen_ziqi/article/details/104246446)[target="_blank" data-report-click={"spm":"1018.2226.3001.9630","extra":{"utm_source":"vip_chatgpt_common_search_pc_result","utm_medium":"distribute.pc_search_result.none-task-cask-2~all~insert_cask~default-1-null.142^v93^chatsearchT3_2"}}] [.reference_item style="max-width: 50%"] [ .reference_list ]

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值