HDU4117-GRE Words(AC自动机+DFS序+区间修改线段树)-CSDN博客

本文链接：https://blog.csdn.net/u013306830/article/details/77586562

GRE Words

Time Limit: 30000/15000 MS (Java/Others) Memory Limit: 32768/32768 K (Java/Others)
Total Submission(s): 4506 Accepted Submission(s): 435

Problem Description

Recently George is preparing for the Graduate Record Examinations (GRE for short). Obviously the most important thing is reciting the words.
Now George is working on a word list containing N words.
He has so poor a memory that it is too hard for him to remember all of the words on the list. But he does find a way to help him to remember. He finds that if a sequence of words has a property that for all pairs of neighboring words, the previous one is a substring of the next one, then the sequence of words is easy to remember.
So he decides to eliminate some words from the word list first to make the list easier for him. Meantime, he doesn’t want to miss the important words. He gives each word an importance, which is represented by an integer ranging from -1000 to 1000, then he wants to know which words to eliminate to maximize the sum of the importance of remaining words. Negative importance just means that George thought it useless and is a waste of time to recite the word.
Note that although he can eliminate any number of words from the word list, he can never change the order between words. In another word, the order of words appeared on the word list is consistent with the order in the input. In addition, a word may have different meanings, so it can appear on the list more than once, and it may have different importance in each occurrence.

Input

The first line contains an integer T(1 <= T <= 50), indicating the number of test cases.
Each test case contains several lines.
The first line contains an integer N(1 <= N <= 2 * 104), indicating the number of words.
Then N lines follows, each contains a string Si and an integer Wi, representing the word and its importance. Si contains only lowercase letters.
You can assume that the total length of all words will not exceeded 3 * 105.

Output

For each test case in the input, print one line: “Case #X: Y”, where X is the test case number (starting with 1) and Y is the largest importance of the remaining sequence of words.

Sample Input

1
5
a 1
ab 2
abb 3
baba 5
abbab 8

Sample Output

Case #1: 14

Source

2011 Asia ChengDu Regional Contest

chenyongfu | We have carefully selected several similar problems for you: 4111 4119 4114 4115 4118

题意：

给定n个字符串，要求按顺序取一些字符串，满足后一个字符串是前一个字符串的子串，要求使得取出的权值和最大。

题目类似一般的DP问题，项最长上升子序列，只不过把上升的要求改成了是前一个的子串，权重也发生了改变
不过没关系DP方程依旧符合
$f[i]=max{f[j]}+w[i]$ ，其中j为I的子串
判断子串关系我们可以通过AC自动机来寻找。

最朴素的想法：
预处理出AC自动机，然后对于第I个串的每个点，每次都去爬fail，找到以那些点为结尾的最大 $f[j]$ 即可，尽管时限有15s，但是hdu的数据还是会TLE。

转RMQ：
最长上升子序列可以用权值线段树或者权值树状数组优化，这题也是如此，考虑AC自动机每次爬fail其实固定的几条链，那么我们可以对fail反向连边，得到一棵fail数

$addedge(fail[k],k)$

求出这棵树的DFS序，每次记一个 $in_i和out_i$
$那么对于第i个点来讲，它的DP值懂贡献区间就是以root-i这个字符串为前缀的串$ ，
$在fail树中，这些串的结束位置都位于以i为根的子树下$ ，
而这正是 $in_i-out_i$ 这段区间，所以每次DP我们我们要做的只有两步
$1.对于第I串的每个字符查询到这个点能达到的最大DP值。$
$2.更新一段区间得最大DP值$
而这正是经典线段树问题

下面给出hduAC的代码

#include<cstdio>
#include<cstring>
#include<algorithm>
#include<iostream>
#include<queue>
using namespace std;
typedef long long LL;
const int maxn=3e5+5;

int cnt,root,n,kase,w[20000+5],pos[20000+5];
int INDEX,in[maxn],out[maxn];
char s[maxn];
int fail[maxn];

int S[maxn*4],tag[maxn*4],MAX,L,R;
int st[maxn],tot;

struct edge
{
  int v,nxt;
}v[maxn*2];

inline void addedge(int x,int y)
{
  v[tot]=(edge){y,st[x]};
  st[x]=tot++;
}

struct node
{
  int nxt[26],cnt;
}T[maxn];

inline int newnode()
{
  cnt++;
  memset(T[cnt].nxt,0,sizeof(T[cnt].nxt));
  T[cnt].cnt=0;
  fail[cnt]=0;
  return cnt;
}

inline void insert(char *s)
{
  int now=root;
  int i=0;
  while(s[i])
  {
      if(!T[now].nxt[s[i]-'a'])T[now].nxt[s[i]-'a']=newnode();
      now=T[now].nxt[s[i]-'a'];
      i++;
  }
  T[now].cnt++;
}

queue<int>Q;
inline void build()
{
  Q.push(root);
  while(!Q.empty())
  {
      int k=Q.front();
      Q.pop();
      if(k!=root)addedge(fail[k],k);
      for(int i=0;i<26;++i)
      {
        if(!T[k].nxt[i]){T[k].nxt[i]=T[fail[k]].nxt[i];continue;}

      if(k!=root)
      {
        fail[T[k].nxt[i]]=T[fail[k]].nxt[i];
      }
        Q.push(T[k].nxt[i]);
      }
  }
}

inline void DFS(int u,int fa)
{
  in[u]=++INDEX;
  for(int i=st[u];~i;i=v[i].nxt)
  if(v[i].v!=fa)DFS(v[i].v,u);
  out[u]=INDEX;
}

inline void update(int k)
{
  S[k]=max(S[k<<1],S[k<<1|1]);
}

inline void down(int k)
{
  if(!tag[k])return ;
  tag[k<<1]=max(tag[k<<1],tag[k]); 
  tag[k<<1|1]=max(tag[k<<1|1],tag[k]);
  S[k<<1]=max(S[k<<1],tag[k]);
  S[k<<1|1]=max(S[k<<1|1],tag[k]);
  tag[k]=0;
}

inline int ask(int l,int r,int k)
{
  if(L<=l&&r<=R)return S[k];
  int mid=(l+r)>>1;
  down(k);
  int res=0;
  if(L<=mid)res=max(res,ask(l,mid,k<<1));
  if(R>mid)res=max(res,ask(mid+1,r,k<<1|1));
  return res;  
}

inline void add(int l,int r,int k)
{
  if(L<=l&&r<=R)
  {
      S[k]=max(S[k],MAX);
      tag[k]=max(tag[k],MAX);
      return ;
  }
  int mid=(l+r)>>1;
  down(k);
  if(L<=mid)add(l,mid,k<<1);
  if(R>mid)add(mid+1,r,k<<1|1);
  update(k);
}

int main()
{
  int t;
  scanf("%d",&t);
  while(t--)
  { 
    scanf("%d",&n);
    cnt=-1;
    root=newnode();
    tot=0;
    memset(st,-1,sizeof(st));

    pos[0]=0;
    for(int i=1;i<=n;++i)
    {
      scanf("%s%d",s+pos[i-1],w+i);
      pos[i]=pos[i-1]+strlen(s+pos[i-1]);
      insert(s+pos[i-1]);
    }

    build();

    INDEX=0;
    DFS(root,-1);

    int ans=0;
    memset(S,0,sizeof(S));
    memset(tag,0,sizeof(tag));

    for(int i=1;i<=n;++i)
    {
      MAX=0;
      int p=root;
      for(int j=pos[i-1];j<pos[i];++j)
      {
          p=T[p].nxt[s[j]-'a'];

          L=in[p];R=in[p];
          int res=ask(1,INDEX,1);
          MAX=max(res,MAX);

      }
      MAX=MAX+w[i];
      ans=max(ans,MAX);
      L=in[p];R=out[p];
      add(1,INDEX,1);
    }

    printf("Case #%d: %d\n",++kase,ans);
  }
  return 0;
}