HDU - 4117 GRE Words (AC自动机+Fail树+线段树+DFS序+DP)

GRE Words


Problem Description
Recently George is preparing for the Graduate Record Examinations (GRE for short). Obviously the most important thing is reciting the words.
Now George is working on a word list containing N words.
He has so poor a memory that it is too hard for him to remember all of the words on the list. But he does find a way to help him to remember. He finds that if a sequence of words has a property that for all pairs of neighboring words, the previous one is a substring of the next one, then the sequence of words is easy to remember.
So he decides to eliminate some words from the word list first to make the list easier for him. Meantime, he doesn't want to miss the important words. He gives each word an importance, which is represented by an integer ranging from -1000 to 1000, then he wants to know which words to eliminate to maximize the sum of the importance of remaining words. Negative importance just means that George thought it useless and is a waste of time to recite the word.
Note that although he can eliminate any number of words from the word list, he can never change the order between words. In another word, the order of words appeared on the word list is consistent with the order in the input. In addition, a word may have different meanings, so it can appear on the list more than once, and it may have different importance in each occurrence.
 

Input
The first line contains an integer T(1 <= T <= 50), indicating the number of test cases.
Each test case contains several lines.
The first line contains an integer N(1 <= N <= 2 * 104), indicating the number of words.
Then N lines follows, each contains a string Si and an integer Wi, representing the word and its importance. Si contains only lowercase letters.
You can assume that the total length of all words will not exceeded 3 * 105.
 

Output
For each test case in the input, print one line: "Case #X: Y", where X is the test case number (starting with 1) and Y is the largest importance of the remaining sequence of words.
 

Sample Input
1 5 a 1 ab 2 abb 3 baba 5 abbab 8
 

Sample Output
Case #1: 14
 


题意:给你连续的很多串字符串,要你删掉某些字符串,使得剩下的所有字符串,前一个都是后一个的子串,并且剩下的字符串的权值和要最大。


解题思路:做完这道题,我是崩溃的!现在真的真的真的知道Fail指针是什么了……其实这道题还是很好推的DP,用到了简单的DP思想。整道题都能自己推出来,但是打完代码……疯狂MLT,醉了……优化到了极限,连懒惰标记都省了,还是MLT……神奇的是,百度到了一份理论上比我用多超过内存的代码,却刚好卡过了,我百思不得其解……就当自己做出来了好了……


思路就是先把所有字符串建一棵trie树,然后连上fail指针变成AC自动机。然后画个图,很容易想到,fail指针连得越长,答案应该会更大,所以这里要dp,既然要求长的,所以把fail指针反向,建fail树,建出来后,我们就可以一个一个字符串的插进去,并维护一个最大值,这里不能每次都更新整棵fail树的值,所以可以用线段树维护,维护树上的线段树要用dfs,所以要在fail树上求个dfs序。然后就很暴力了…………


我的代码超内存代码……

#include<iostream>
#include<deque>
#include<memory.h>
#include<stdio.h>
#include<map>
#include<string.h>
#include<algorithm>
#include<vector>
#include<math.h>
#include<stack>
#include<queue>
#include<set>
using namespace std;
typedef long long int ll;
#define MAXN 300005
#define  NEXTNUM 26
#define  FC 'a'
#define  INF 0x3f3f3f3f

int nxt[MAXN][NEXTNUM];//trie树
int fail[MAXN];//fail指针
int cnt;

vector<int> Edge[MAXN];//fail树
void insert_edge(int v1,int v2){
    Edge[v1].push_back(v2);
}

//dfs序
int ln[MAXN];
int rn[MAXN];
int SEG;
void dfs(int s){
    ln[s]=++SEG;
    for(int i=0;i<Edge[s].size();i++)
        dfs(Edge[s][i]);
    rn[s]=SEG;
    Edge[s].clear();
}

//AC自动机部分
int newnode(){
    for(int j=0;j<NEXTNUM;j++)
        nxt[cnt][j]=0;
    //num[cnt]=0;
    fail[cnt]=0;
    return cnt++;
}

void insert(char s[],int len){
    int p=0;
    for(int i=0;i<len;i++){
        int x=s[i]-FC;
        if(nxt[p][x]==0){
            nxt[p][x]=newnode();
        }
        p=nxt[p][x];
    }
    //num[p]++;
}

int ans=0;
void build_fail(){
    int temp;
    queue<int> que;
    que.push(0);
    while(!que.empty()){
        temp=que.front();
        que.pop();
        if(temp)
            insert_edge(fail[temp],temp);
        for(int i=0;i<NEXTNUM;i++){
            int v=nxt[temp][i];
            if(v==0)
                nxt[temp][i]=nxt[fail[temp]][i];//高效求解fail且高效匹配
            else
                que.push(v);
            if(temp!=0)
                fail[v]=nxt[fail[temp]][i];
        }
    }
}

//int AC_Match(char s[],int len){
//    int p=0;
//    int count=0;
//    for(int i=0;i<len;i++){
//        int x=s[i]-FC;
//        p=nxt[p][x];
//        for(int f=p;f;f=fail[f])
//            count+=num[f];
//    }
//    return count;
//}


//线段树部分
int tree[MAXN<<2];

void update(int L,int R,int C,int l,int r,int rt){
    if(L==l&&r==R){
        tree[rt]=C;
        return;
    }
    int m=(l+r)>>1;
    if(R<=m)
        update(L,R,C,l,m,rt<<1);
    else
        if(L>m)
            update(L,R,C,m+1,r,rt<<1|1);
        else{
            update(L,m,C,l,m,rt<<1);
            update(m+1,R,C,m+1,r,rt<<1|1);
        }
}

int query(int L,int l,int r,int rt){
    if(L==l&&r==L)
        return tree[rt];
    int m=(l+r)>>1;

    //直接下推,节省内存
    tree[rt<<1]=max(tree[rt<<1],tree[rt]);
    tree[rt<<1|1]=max(tree[rt<<1|1],tree[rt]);

    int ANS=0;
    if(L<=m)
        ANS=max(ANS,query(L,l,m,rt<<1));
    else
        ANS=max(ANS,query(L,m+1,r,rt<<1|1));
    return ANS;
}

char b[MAXN];
int pos[20005];//保存偏移量,为了省内存……
int val[20005];//权重

int main(){

    int t;
    scanf("%d",&t);
    int N;
    for(int qqq=1;qqq<=t;qqq++){
        scanf("%d",&N);
        SEG=0;
        cnt=0;
        pos[0]=0;
        newnode();
        for(int i=0;i<N;i++){
            scanf("%s%d",b+pos[i],&val[i]);
            int len=strlen(b+pos[i]);
            pos[i+1]=pos[i]+len;
            insert(b+pos[i],len);
        }
        build_fail();
        dfs(0);
        memset(tree,0,sizeof(tree));
        int ans=0;
        for(int i=0;i<=N;i++){
            int p=0;
            int pre=0;
            for(int j=pos[i];j<pos[i+1];j++){
                int v=val[i]*(j==pos[i+1]-1);//看看是不是单词末尾
                p=nxt[p][b[j]-FC];
                int tmp=query(ln[p],1,SEG,1);
                //dp思想,有点像最大子段和
                tmp=max(pre,tmp)+v;
                ans=max(ans,tmp);
                if(tmp>tree[1])
                    update(ln[p],rn[p],tmp,1,SEG,1);
                pre=tmp;
            }
        }
        printf("Case #%d: %d\n",qqq,ans);
    }
    return 0;
}



大神的AC代码!看数组的个数……比我多好几倍啊……为什么就AC了?求解释…………

#include <cstring>
#include <cstdio>
#define max(a,b) a>b?a:b
const int mn=300005;
struct Trie{
	int father,fail,next[26];
	void init(){
		father=fail=0;
		memset(next,0,sizeof next);
	}
}tree[mn];
struct Xls{
	int size,y[mn],nxt[mn],fst[mn];
	void set(){
		size=0;
		memset(fst,0,sizeof fst);
	}
	void add(int a,int b){
		y[++size]=b;
		nxt[size]=fst[a];
		fst[a]=size;
	}
}g;
struct Segtree{
	int val,add,l,r;
}segtree[mn*4];
char str[mn];
int pos[mn],n,cnt,val[21000],in[mn],out[mn];
void build(char ch[]){
	int rt=1,t,len=strlen(ch);
	for (int i=0;i<len;i++){
		t=ch[i]-'a';
		if (!tree[rt].next[t]){
			tree[++cnt].init();
			tree[rt].next[t]=cnt;
		}
		tree[tree[rt].next[t]].father=rt;
		rt=tree[rt].next[t];
	}
}
int q[310000];
void build_AC(){
	int head=0,tail=0,rt,son,k;
	q[++tail]=1;tree[0].fail=1;
	while (head<tail){
		rt=q[++head];
		for (int i=0;i<26;i++){
			son=tree[rt].next[i];
			if (!son){
				tree[rt].next[i]=tree[tree[rt].fail].next[i];
				continue;
			}
			k=tree[rt].fail;
			while (!tree[k].next[i])
				k=tree[k].fail;
			tree[son].fail=tree[k].next[i];
			q[++tail]=son;
		}
	}
	g.set();
	for (int i=1;i<=cnt;i++)
		g.add(tree[i].fail,i);
}
int count_dfs;
void dfs(int rt){
	in[rt]=++count_dfs;
	for (int i=g.fst[rt];i;i=g.nxt[i])
		dfs(i);
	out[rt]=count_dfs;
}
void buildseg(int rt,int le,int ri){
	segtree[rt].val=segtree[rt].add=0;
	segtree[rt].l=le;segtree[rt].r=ri;
	if (le==ri)
		return;
	buildseg(rt<<1,le,(le+ri)>>1);
	buildseg(rt<<1|1,((le+ri)>>1)+1,ri);
}
void pushdown(int rt){
	if (segtree[rt].add){
		segtree[rt<<1].add=max(segtree[rt<<1].add,segtree[rt].add);
		segtree[rt<<1].val=max(segtree[rt<<1].val,segtree[rt].add);
		segtree[rt<<1|1].add=max(segtree[rt<<1|1].add,segtree[rt].add);
		segtree[rt<<1|1].val=max(segtree[rt<<1|1].val,segtree[rt].add);
		segtree[rt].add=0;
	}
}
int query(int rt,int x){
	if (segtree[rt].l==segtree[rt].r)
		return segtree[rt].val;
	pushdown(rt);
	if (x<=segtree[rt<<1].r)
		return query(rt<<1,x);
	else
		return query(rt<<1|1,x);
}
int XLE,XRI,Xval;
void update(int rt){
	if (segtree[rt].l>XRI||segtree[rt].r<XLE)
		return;
	if (XLE<=segtree[rt].l&&segtree[rt].r<=XRI){
		segtree[rt].val=max(segtree[rt].val,Xval);
		segtree[rt].add=max(segtree[rt].add,Xval);
		return;
	}
	pushdown(rt);
	update(rt<<1);
	update(rt<<1|1);
	segtree[rt].val=max(segtree[rt<<1].val,segtree[rt<<1|1].val);
}
int solve(){
	int rt,t,big,result=0;
	for (int i=1;i<=n;i++){
		rt=1;big=0;
		for (int j=pos[i-1];j<pos[i];j++){
			t=str[j]-'a';
			rt=tree[rt].next[t];
			big=max(big,query(1,in[rt]));
		}
		XLE=in[rt];XRI=out[rt];Xval=big+val[i];
		update(1);
		result=max(result,Xval);
	}
	return result;
}
int main(){
	int T;
	scanf("%d",&T);
	for (int Case=1;Case<=T;Case++){
		memset(str,0,sizeof str);
		memset(pos,0,sizeof pos);
		cnt=1;
		tree[0].init();
		tree[1].init();
		for (int i=0;i<26;i++)
			tree[0].next[i]=1;
		scanf("%d",&n);
		for (int i=1;i<=n;i++){
			scanf("%s%d",str+pos[i-1],&val[i]);
			build(str+pos[i-1]);
			pos[i]=pos[i-1]+strlen(str+pos[i-1]);
		}
		build_AC();
		count_dfs=0;
		dfs(1);
		buildseg(1,1,count_dfs);
		printf("Case #%d: %d\n",Case,solve());
	}
	return 0;
}





发布了402 篇原创文章 · 获赞 67 · 访问量 16万+
展开阅读全文

没有更多推荐了,返回首页

©️2019 CSDN 皮肤主题: 点我我会动 设计师: 上身试试

分享到微信朋友圈

×

扫一扫,手机浏览