Keywords Search(AC自动机入门题)

Keywords Search

Time Limit : 2000/1000ms (Java/Other)   Memory Limit : 131072/131072K (Java/Other)

Total Submission(s) : 20   Accepted Submission(s) : 17

Problem Description

In the modern time, Search engine came into the life of everybody like Google, Baidu, etc.
Wiskey also wants to bring this feature to his image retrieval system.
Every image have a long description, when users type some keywords to find the image, the system will match the keywords with description of image and show the image which the most keywords be matched.
To simplify the problem, giving you a description of image, and some keywords, you should tell me how many keywords will be match.

 

Input

First line will contain one integer means how many cases will follow by. Each case will contain two integers N means the number of keywords and N keywords follow. (N <= 10000) Each keyword will only contains characters 'a'-'z', and the length will be not longer than 50. The last line is the description, and the length will be not longer than 1000000.

 

Output

Print how many keywords are contained in the description.

 

Sample Input

1

5

she

he

say

shr

her

yasherhs

 

Sample Output

3

解题思路:使用AC自动机模板

AC代码:

#include<iostream>
#include<string>
#include<string.h>
#include<stdio.h>
#include<queue>
using namespace std;
struct Trie
{
    int values;
    Trie *child[26];
    Trie *fail;              
    Trie()
    {
        values=0;
        memset(child,NULL,sizeof(child));
        fail=NULL;
    }
}*root;
int ans;
char patten[60];
char text[1000010];
void create(char s[])                   //构建字典树
{
    Trie *x=root;
    for(int i=0;i<strlen(s);i++)
    {
        int d=s[i]-'a';
        if(x->child[d]==NULL)
        {
            x->child[d]=new Trie;
        }
        x=x->child[d];
    }
    x->values++;
}
void deleteTrie(Trie *x)                //清除字典树
{
    if(x==NULL)
        return;
    for(int i=0;i<26;i++)
    {
        if(x->child[i]!=NULL)
        {
            deleteTrie(x->child[i]);
        }
    }
    delete x;
}
void build_AC_automaton()               //构建AC自动机
{
    Trie *p;                            
    p=root;
    queue<Trie*> qu;                    //使用广搜实现
    qu.push(p);
    while(!qu.empty())
    {
        p=qu.front();                   
        qu.pop();
        for(int i=0;i<26;i++)
        {
            if(p->child[i]!=NULL)
            {
                if(p==root)
                {
                    p->child[i]->fail=root;
                }
                else
                {
                    Trie *node=p->fail;         
                    while(node!=NULL)       //查找当前节点的fail指针是否为空
                    {
                        if(node->child[i]!=NULL)    //判断当前节点和fail指针指向的孩子节点是否相等
                        {
                            p->child[i]->fail=node->child[i]; //相等令p的孩子节点fail指针指向p节点指向的fail指针的孩子节点
                            break;
                        }
                        node=node->fail;
                    }
                    if(node==NULL)
                        p->child[i]->fail=root;
                }
                qu.push(p->child[i]);
            }
        }
    }
}
void find_in_AC_automaton()         //查找过程
{
    Trie *p;
    p=root;
    int index=0;
    while(text[index]!='\0')
    {
        int id=text[index]-'a';
        while(p->child[id]==NULL && p!=root)
            p=p->fail;
        p=p->child[id];
        if(p==NULL) p=root;
        Trie *temp=p;
        while(temp!=NULL && temp->values!=-1)
        {
            ans+=temp->values;
            temp->values=-1;
            temp=temp->fail;
        }
        index++;
    }
}
int main()
{
    int t,n;
    scanf("%d",&t);
    while(t--)
    {
        scanf("%d",&n);
        root = new Trie;
        for(int i=0;i<n;i++)
        {
            scanf("%s",patten);
            create(patten);
        }
        scanf("%s",text);
        build_AC_automaton();
        ans=0;
        find_in_AC_automaton();
        printf("%d\n",ans);
        deleteTrie(root);
    }
    return 0;
}

 

  • 1
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
以下是AC自动机Java模板U301874的代码实现: ```java import java.io.*; import java.util.*; public class Main { static final int MAXN = 100010, MAXM = 200010; static final int INF = 0x3f3f3f3f; static int n, m, cnt; static int[] trie = new int[MAXN * 30], idx = new int[MAXN * 30]; static int[] fail = new int[MAXN * 30], vis = new int[MAXN * 30]; static int[] head = new int[MAXN], nxt = new int[MAXM], ver = new int[MAXM], tot; static int[] deg = new int[MAXN]; static char[][] str = new char[MAXN][30]; static Map<Character, Integer> map = new HashMap<>(); static int add(char[] s) { int p = 0; for (int i = 0; s[i] != '\0'; i++) { char c = s[i]; if (!map.containsKey(c)) { map.put(c, ++cnt); } int u = map.get(c); if (trie[p] == 0) { trie[p] = ++tot; } p = trie[p]; idx[p] = u; } return p; } static void build() { Queue<Integer> q = new LinkedList<>(); for (int i = 1; i <= cnt; i++) { int u = map.get(str[i][0]); if (trie[0] == 0) { trie[0] = ++tot; } int p = trie[0]; idx[p] = 0; if (trie[p + u] == 0) { trie[p + u] = ++tot; } fail[p + u] = p; q.offer(p + u); } while (!q.isEmpty()) { int u = q.poll(); for (int i = head[idx[u]]; i != 0; i = nxt[i]) { int v = ver[i]; int p = fail[u], q = 0; while (p != 0 && trie[p + v] == 0) { p = fail[p]; } if (trie[p + v] != 0) { q = trie[p + v]; } fail[u + v] = q; q.offer(u + v); } } } static void addEdge(int u, int v) { ver[++tot] = v; nxt[tot] = head[u]; head[u] = tot; } static void topo() { Queue<Integer> q = new LinkedList<>(); for (int i = 1; i <= tot; i++) { if (deg[i] == 0) { q.offer(i); } } while (!q.isEmpty()) { int u = q.poll(); vis[u] = 1; for (int i = head[u]; i != 0; i = nxt[i]) { int v = ver[i]; deg[v]--; if (deg[v] == 0) { q.offer(v); } } } } public static void main(String[] args) { Scanner in = new Scanner(System.in); n = in.nextInt(); for (int i = 1; i <= n; i++) { String s = in.next(); str[i] = s.toCharArray(); add(str[i]); } m = in.nextInt(); for (int i = 1; i <= m; i++) { String s = in.next(); int len = s.length(); int p = 0; for (int j = 0; j < len; j++) { char c = s.charAt(j); if (!map.containsKey(c)) { break; } int u = map.get(c); if (trie[p + u] == 0) { break; } p = trie[p + u]; deg[p]++; addEdge(p, p + u); } } build(); topo(); for (int i = 1; i <= n; i++) { int p = 0; for (int j = 0; str[i][j] != '\0'; j++) { p = trie[p + map.get(str[i][j])]; if (vis[p] == 1) { System.out.println("YES"); break; } } if (vis[p] == 0) { System.out.println("NO"); } } } } ``` 该解释:给定 $n$ 个模式串和 $m$ 个文本串,问每个模式串是否存在于文本串中。其中,模式串和文本串都只包含小写字母。 AC自动机是一种可以高效匹配多个模式串的数据结构。该需要使用AC自动机进行多模式串匹配。 代码实现中,使用一个trie树存储所有模式串,每个节点记录了下一层的字符和对应的子节点编号。同时,使用一个map记录每个字符对应的编号,以便于在trie树中查找。 在trie树构建完成后,使用广度优先搜索构建fail指针。搜索过程中,对于每个节点 $u$,依次查找其父亲节点 $p$ 直到根节点,若 $p$ 的子节点 $v$ 与 $u$ 的子节点 $w$ 匹配,则令 $u$ 的fail指针指向 $p+v$ 节点。如果 $p+v$ 节点不存在,则继续向根节点搜索。 在fail指针构建完成后,对于每个文本串,从根节点开始依次匹配每个字符,直到匹配完成或者无法匹配。如果最终匹配的节点已经被访问,则说明该模式串存在于文本串中。 时间复杂度为 $O(\sum |P|+|T|)$,其中 $\sum |P|$ 表示所有模式串的长度之和,$|T|$ 表示所有文本串的长度之和。
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值