hdu--6096--String

最新推荐文章于 2021-04-07 13:35:19 发布

职业炮灰

最新推荐文章于 2021-04-07 13:35:19 发布

阅读量340

点赞数

分类专栏：算法_杭电多校联合

本文链接：https://blog.csdn.net/king_cannon_fodder/article/details/77159847

版权

算法_杭电多校联合同时被 2 个专栏收录

22 篇文章 0 订阅

订阅专栏

算法_字典树

3 篇文章 0 订阅

订阅专栏

String

Time Limit: 6000/3000 MS (Java/Others) Memory Limit: 524288/524288 K (Java/Others)
Total Submission(s): 746 Accepted Submission(s): 236

Problem Description

Bob has a dictionary with N words in it.
Now there is a list of words in which the middle part of the word has continuous letters disappeared. The middle part does not include the first and last character.
We only know the prefix and suffix of each word, and the number of characters missing is uncertain, it could be 0. But the prefix and suffix of each word can not overlap.
For each word in the list, Bob wants to determine which word is in the dictionary by prefix and suffix.
There are probably many answers. You just have to figure out how many words may be the answer.

Input

The first line of the input gives the number of test cases T; T test cases follow.
Each test case contains two integer N and Q, The number of words in the dictionary, and the number of words in the list.
Next N line, each line has a string Wi, represents the ith word in the dictionary (

0<|Wi|≤100000 )
Next Q line, each line has two string Pi , Si, represents the prefix and suffix of the ith word in the list (

0<|Pi|,|Si|≤100000,0<|Pi|+|Si|≤100000 )
All of the above characters are lowercase letters.
The dictionary does not contain the same words.

Limits

T≤5

0<N,Q≤100000

∑Si+Pi≤500000

∑Wi≤500000

Output

For each test case, output Q lines, an integer per line, represents the answer to each word in the list.

Sample Input

  
  
   
   1
4 4
aba
cde
acdefa
cdef
a a
cd ef
ac a
ce f

Sample Output

题目大意：

首先给你若干个字符串，然后再给你若干个字符串的前缀和后缀，分别算出以给出的前缀和后缀作为前缀和后缀的字符串有多少个；

解题思路：

这道题可以用字典树来做，因为需要查询字符串的前缀和后缀，所以需要对字符串经过处理后，再进行建树以及查询对于串s，是把s串第0位，第n-1位，第1位，第n-2位，，，这样的顺序插入字典树。

查询的时候用同样的方法，如果前缀和后缀的字母的个数不一样就用 * 来补，而字典树需要维护的有3个，一是经过每个节点的字符串个数，二是经过这个节点的各个字符串的长度，三是这个节点的后继节点。查询的时候注意，如果出现了 * 则这个点后面的所有点，都可能符合要求，都要进行处理，具体的查询的方法是用两个列 queue<int> q[2];交替使用实现的，感觉用的有点小巧妙：树建好之后，首先0入队，从根节点开始查询，当前这个点的后继节中点存在我们这次需要查询的点即代码中的 net[now][id1] != -1 ,我们就把这个后继节点放到另一个队列中，等待处理。当这个队列里面需要查询的点全部查询完毕，即队列为空的时候，下一波需要查询的点就存储在那个不为空的队列里面，在查询当前队列里面元素的时候，同样的方法把查询到的符合条件，需要查询的后继节点放到另一个队列中，等待这一波处理完后，再进行处理，如此循环往复.......。因为查询的字符串的长度为偶数，所以最后的结果存储在q[0]这个队列中。还需要注意的是，当前缀和后缀出现重合的情况，比如aba，的前缀为ab，后缀为ba。这种情况是不符合的，除去这种情况的方法是比较原字符串的长度，和要查询的前缀与后缀的长度和。

详见代码：

    C++ Code  
 

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129

#include<bits/stdc++.h>
#define N 500005
using namespace std;

char s1[N], s2[N], s[N], st[ 2 * N]; ///s1,s2存储输入的前后缀，s存储建树的字符串，st存储处理后的字符串
int num[ 2 * N], nxt[ 2 * N][ 26]; ///nun存储经过各个节点的字符串个数,nxt[x][y]表示x节点的后继节点是否存在y,不存在则nxt[x][y]为-1
vector< int >T[ 2 * N]; ///存储经过节点的字符串长度
int L, root, n, q; ///L表示节点的编号,
int nownode()
{
     ///建立新的节点
     for( int i = 0; i < 26; i++)
        nxt[L][i] = - 1;
    num[L] = 0;
     return L++;
}

void build( char *s, int len)
{
     ///建树
     int lens = strlen(s);
     int now = root;
     for( int i = 0; i < lens; i++)
    {
         int id = s[i] - 'a';
         if(nxt[now][id] == - 1)
            nxt[now][id] = nownode();
        now = nxt[now][id];
        T[now].push_back(len);
        num[now]++;
    }
}
void dfs( int x)
{
     ///根据经过该节点字符串的长度，进行排序
    sort(T[x].begin(), T[x].end());
     for( int i = 0; i < 26; i++)
         if(nxt[x][i] != - 1)
            dfs(nxt[x][i]);
}
int main()
{
     int t;
    scanf( "%d", &t);
     while(t--)
    {
        L = 0;
         for( int i = 0; i < 2 * N; i++)
            T[i].clear();
        root = nownode();
        scanf( "%d %d", &n, &q);
         for( int i = 1; i <= n; i ++)
        {
            scanf( "%s", s);
             int len = strlen(s);
             for( int i = 0; i < len; i ++)
            {
                st[i * 2] = s[i];
                st[i * 2 + 1] = s[len - 1 - i];
            }
            st[len * 2] = '\0';
            build(st, len); ///根据输入的字符串建树
        }
        dfs( 0); ///排序
         for( int i = 1; i <= q; i ++)
        {
            scanf( "%s %s", s1, s2);
             int len1 = strlen(s1);
             int len2 = strlen(s2);
             int ret = len1 + len2;
             int len = max(len1, len2);
             for( int j = 0; j < len; j ++)
            {
                 ///对字符串进行处理
                 if(j < len1)
                    st[j * 2] = s1[j];
                 else
                    st[j * 2] = '*';
                 if(j < len2)
                    st[j * 2 + 1] = s2[len2 - 1 - j];
                 else
                    st[j * 2 + 1] = '*';
            }

            len *= 2;
            st[len] = '\0';
            queue< int> q[ 2];
             int tmp = 0;
            q[ 0].push( 0);
             int ans = 0;
             for( int j = 0; j < len; j ++)
            {
                tmp = 1 - tmp;
                 int id1 = st[j] - 'a';
                 while(!q[ 1 - tmp].empty())
                {
                     int now = q[ 1 - tmp].front();
                    q[ 1 - tmp].pop();
                     if(st[j] == '*')
                    {
                         for( int k = 0; k < 26; k ++)
                        {
                             if(nxt[now][k] != - 1)
                                q[tmp].push(nxt[now][k]);
                        }
                    }
                     else
                    {
                         if(nxt[now][id1] != - 1)
                            q[tmp].push(nxt[now][id1]);
                    }
                }
            }

             while(!q[tmp].empty())
            {
                 int now = q[tmp].front();
                q[tmp].pop();
                 int cnt = lower_bound(T[now].begin(), T[now].end(), ret) - T[now].begin();
                 ///lower_bound返回的是大于或等于ret的第一个元素的位置，再减去元素首位置，就是这个数组中有多少个元素比ret小,排除前后缀重叠的情况
                ans -= cnt;
                ans += num[now];
            }
            printf( "%d\n", ans);
        }
    }
     return 0;
}