NOIP模拟(10.22)T1 姓名匹配

9 篇文章 0 订阅
2 篇文章 0 订阅
  姓名匹配

题目背景:

10.22 NOIP模拟作业T1

分析:trie + 贪心

讲道理,我到现在都不知道这玩意儿的正确性在何方······后来经过thhyj dalao的提点,好像懂一点了,试着解释下,首先明确一件事情。

 

对于模式串a1 有两个串b1, b2, b1, b2的最优模式串均为a1时,若lcp(b1, a1) > lcp(b2, a1) 那么a1肯定选择b2更优,我们来证明一下,假设,存在另一个模式串,a2,是匹配b1, b2第二优的,那么显然,若a2b2的匹配长度为x, x < lcp(a1, b2), 那么显然的a2b1的匹配长度也为x,那么因为a1b2的匹配长度是小于a1b1匹配长度的,所以是对的,若a2b1的匹配长度x > lcp(b2, a1), x < lcp(a1, b1), (前面说了a1是最优的),那么lcp(b2, a2) =lcp(b2, a1), 因为x < lcp(a1, b1)所以得证。

 

回到代码中的贪心,显然的,如果对于trie上的一个节点,若cnta > cntb, 那么不管怎么说,这些匹配串b肯定会选择同样经过这个节点的a串,若cntb < cnta那么显然被选择的是和这些匹配串a匹配长度最长的串,满足上面的证明,所以这个贪心是对的。(感觉还是很不清晰······之后再想办法解释吧)

Source

/*
	created by scarlyw
*/
#include <iostream>
#include <cstdio>
#include <string>
#include <cstring>
#include <cmath>
#include <algorithm>
#include <cctype>
#include <set>
#include <map>
#include <vector>
#include <queue>

const int MAXN = 800000;
const int MAXX = 60;
const int ALP = 26;

int n, ans, cnt;
char s[MAXX];

struct node {
	int c[ALP];
	int cnt[2];
} trie[MAXN];

inline void insert(char *s, bool flag) {
	int len = strlen(s), pos = 0;
	for (int i = 0; i < len; ++i) {
		if (trie[pos].c[s[i] - 'a'] == 0) trie[pos].c[s[i] - 'a'] = ++cnt;
		pos = trie[pos].c[s[i] - 'a'];
		trie[pos].cnt[flag]++;
	}
}

inline void solve() {
	scanf("%d", &n);
	for (int i = 1; i <= n; ++i) scanf("%s", s), insert(s, 0);
	for (int i = 1; i <= n; ++i) scanf("%s", s), insert(s, 1);
	for (int i = 0; i <= cnt; ++i) 
		ans += std::min(trie[i].cnt[0], trie[i].cnt[1]);
	std::cout << ans;
}

int main() {
	solve();
	return 0;
}

然后,听dalao表示,还有一种用后缀数组套平衡树的做法,听了一下感觉好理解些,但是码长大概要自行体会······

思想大概就是,将所有的串接在一起然后扔去做后缀数组,然后求得height数组,然后把所有串按照rank排序,并预处理两两之间的lcp那么显然,如果一个匹配串一个模式串在排序后是相邻的,那么一定会考虑先选,所以开一个平衡树,把满足相邻情况的串放进去,然后每次找出最大的一对,然后在原数组中删去这两个串,这一步可以通过链表来完成,若产生了新的相邻对,再加入即可,这样选n次就搞定了······

(orz xehoth dalao)

Source

#include <bits/stdc++.h>

namespace IO {

inline char read() {
    static const int IN_LEN = 1000000;
    static char buf[IN_LEN], *s, *t;
    s == t ? t = (s = buf) + fread(buf, 1, IN_LEN, stdin) : 0;
    return s == t ? -1 : *s++;
}

template <typename T>
inline bool read(T &x) {
    static char c;
    static bool iosig;
    for (c = read(), iosig = false; !isdigit(c); c = read()) {
        if (c == -1) return false;
        c == '-' ? iosig = true : 0;
    }
    for (x = 0; isdigit(c); c = read()) x = x * 10 + (c ^ '0');
    iosig ? x = -x : 0;
    return true;
}

inline void read(char &c) {
    while (c = read(), isspace(c) && c != -1)
        ;
}

inline int read(char *buf) {
    register int s = 0;
    register char c;
    while (c = read(), isspace(c) && c != -1)
        ;
    if (c == -1) {
        *buf = 0;
        return -1;
    }
    do
        buf[s++] = c;
    while (c = read(), !isspace(c) && c != -1);
    buf[s] = 0;
    return s;
}

const int OUT_LEN = 1000000;

char obuf[OUT_LEN], *oh = obuf;

inline void print(char c) {
    oh == obuf + OUT_LEN ? (fwrite(obuf, 1, OUT_LEN, stdout), oh = obuf) : 0;
    *oh++ = c;
}

template <typename T>
inline void print(T x) {
    static int buf[30], cnt;
    if (x == 0) {
        print('0');
    } else {
        x < 0 ? (print('-'), x = -x) : 0;
        for (cnt = 0; x; x /= 10) buf[++cnt] = x % 10 | 48;
        while (cnt) print((char)buf[cnt--]);
    }
}

inline void print(const char *s) {
    for (; *s; s++) print(*s);
}

inline void flush() { fwrite(obuf, 1, oh - obuf, stdout); }

struct InputOutputStream {
    template <typename T>
    inline InputOutputStream &operator>>(T &x) {
        read(x);
        return *this;
    }

    template <typename T>
    inline InputOutputStream &operator<<(const T &x) {
        print(x);
        return *this;
    }

    ~InputOutputStream() { flush(); }
} io;
}

/**
 * 1000000 * 4
 * 15 MB
 */
namespace SegmentTree {

const int MAXN = 1000000;

int d[MAXN * 4], M;

inline int optMin(int a, int b) { return a < b ? a : b; }

inline void build(const int n, const int *a) {
    for (M = 1; M < n + 2; M <<= 1)
        ;
    for (register int i = 1; i <= n; i++) d[i + M] = a[i];
    for (register int i = M - 1; i; i--)
        d[i] = optMin(d[i << 1], d[i << 1 | 1]);
}

inline int query(register int s, register int t) {
    register int ret = INT_MAX;
    for (s = s + M - 1, t = t + M + 1; s ^ t ^ 1; s >>= 1, t >>= 1) {
        (~s & 1) ? ret = optMin(ret, d[s ^ 1]) : 0;
        (t & 1) ? ret = optMin(ret, d[t ^ 1]) : 0;
    }
    return ret;
}
}

namespace {

inline bool islms(const int i, const bool *t) {
    return i > 0 && t[i] && !t[i - 1];
}

template <typename T>
inline void sort(T s, int *sa, const int len, const int sz, const int sigma,
                 bool *t, int *b, int *cb, int *p) {
    memset(b, 0, sizeof(int) * sigma);
    memset(sa, -1, sizeof(int) * len);
    for (register int i = 0; i < len; i++) b[s[i]]++;
    cb[0] = b[0];
    for (register int i = 1; i < sigma; i++) cb[i] = cb[i - 1] + b[i];
    for (register int i = sz - 1; i >= 0; i--) sa[--cb[s[p[i]]]] = p[i];
    for (register int i = 1; i < sigma; i++) cb[i] = cb[i - 1] + b[i - 1];
    for (register int i = 0; i < len; i++)
        if (sa[i] > 0 && !t[sa[i] - 1]) sa[cb[s[sa[i] - 1]]++] = sa[i] - 1;
    cb[0] = b[0];
    for (register int i = 1; i < sigma; i++) cb[i] = cb[i - 1] + b[i];
    for (register int i = len - 1; i >= 0; i--)
        if (sa[i] > 0 && t[sa[i] - 1]) sa[--cb[s[sa[i] - 1]]] = sa[i] - 1;
}

template <typename T>
inline void sais(T s, int *sa, const int len, bool *t, int *b, int *b1,
                 const int sigma) {
    register int i, j, x, p = -1, sz = 0, cnt = 0, *cb = b + sigma;
    for (t[len - 1] = 1, i = len - 2; i >= 0; i--)
        t[i] = s[i] < s[i + 1] || (s[i] == s[i + 1] && t[i + 1]);
    for (i = 1; i < len; i++)
        if (t[i] && !t[i - 1]) b1[sz++] = i;
    sort(s, sa, len, sz, sigma, t, b, cb, b1);
    for (i = sz = 0; i < len; i++)
        if (islms(sa[i], t)) sa[sz++] = sa[i];
    for (i = sz; i < len; i++) sa[i] = -1;
    for (i = 0; i < sz; i++) {
        for (x = sa[i], j = 0; j < len; j++) {
            if (p == -1 || s[x + j] != s[p + j] || t[x + j] != t[p + j]) {
                p = x, cnt++;
                break;
            } else if (j > 0 && (islms(x + j, t) || islms(p + j, t))) {
                break;
            }
        }
        sa[sz + (x >>= 1)] = cnt - 1;
    }
    for (i = j = len - 1; i >= sz; i--)
        if (sa[i] >= 0) sa[j--] = sa[i];
    register int *s1 = sa + len - sz, *b2 = b1 + sz;
    if (cnt < sz)
        sais(s1, sa, sz, t + len, b, b1 + sz, cnt);
    else
        for (i = 0; i < sz; i++) sa[s1[i]] = i;
    for (i = 0; i < sz; i++) b2[i] = b1[sa[i]];
    sort(s, sa, len, sz, sigma, t, b, cb, b2);
}

template <typename T>
inline void getHeight(T s, const int n, int *sa, int *rk, int *ht) {
    for (register int i = 1; i <= n; i++) rk[sa[i]] = i;
    for (register int i = 0, j = 0, k = 0; i < n; ht[rk[i++]] = k)
        for (k ? k-- : 0, j = sa[rk[i] - 1]; s[j + k] == s[i + k]; k++)
            ;
}

const int MAXM = 100010;
const int MAX_LEN = 800010;
const int MAXN = MAXM * 2 + MAX_LEN + 100;

struct SuffixArray {
    int sa[MAXN], rk[MAXN], ht[MAXN], s[MAXN];
    int n;
    bool t[MAXN << 1];

    inline void build(const int sigma) {
        s[n] = 0, sais(s, sa, n + 1, t, rk, ht, sigma);
        rk[0] = ht[0] = 0, getHeight(s, n, sa, rk, ht);
    }

    inline int &operator[](const int i) { return s[i]; }
} suffixArray;

using IO::io;

int *sa, *rk, *ht, pos1[MAXN + 1], pos2[MAXN + 1], lcp[MAXN + 1];
bool t[MAXN + 1];
int next[MAXN + 1], prev[MAXN + 1];

char buf[MAXN * 2 + 1];

typedef std::pair<int, int> Pair;

Pair d[MAXN * 2];

inline bool type(const Pair &x) { return x.first == rk[pos2[x.second]]; }

inline void solve() {
#ifdef DBG
    std::cerr << "static memory = "
              << sizeof(SegmentTree::d) + sizeof(suffixArray) + sizeof(pos1) +
                     sizeof(pos2) + sizeof(lcp) + sizeof(t) + sizeof(next) +
                     sizeof(prev) + sizeof(buf) + sizeof(d)
              << std::endl;
    if (sizeof(SegmentTree::d) + sizeof(suffixArray) + sizeof(pos1) +
            sizeof(pos2) + sizeof(lcp) + sizeof(t) + sizeof(next) +
            sizeof(prev) + sizeof(buf) + sizeof(d) >
        128 * 1024 * 1024) {
        std::cerr << "MLE" << std::endl;
    }
#endif
    register int &sn = suffixArray.n;
    register int n;
    io >> n;
    for (register int i = 0, len; i < n; i++) {
        len = IO::read(buf), pos1[i] = sn;
        for (register char *c = buf; *c; c++) suffixArray[sn++] = *c;
        suffixArray[sn++] = 256 + i;
    }
    for (register int i = 0, len; i < n; i++) {
        len = IO::read(buf), pos2[i] = sn;
        for (register char *c = buf; *c; c++) suffixArray[sn++] = *c;
        suffixArray[sn++] = 256 + n + i;
    }
    suffixArray.build(suffixArray[sn - 1] + 1);
    sa = suffixArray.sa, rk = suffixArray.rk, ht = suffixArray.ht;
    register int cnt = 0;
    for (register int i = 0; i < n; i++) {
        d[++cnt] = Pair(rk[pos1[i]], i);
        d[++cnt] = Pair(rk[pos2[i]], i);
    }
    std::sort(d + 1, d + cnt + 1);
    SegmentTree::build(sn, ht);
    for (register int i = 2; i <= cnt; i++)
        lcp[i] = SegmentTree::query(d[i - 1].first + 1, d[i].first);
    for (register int i = 1; i <= cnt; i++) t[i] = type(d[i]);
    register int head = 0, tail = cnt + 1;
    for (register int i = 0; i <= cnt; i++) next[i] = i + 1, prev[i + 1] = i;
    static std::set<Pair> set;
    for (register int i = 2; i <= cnt; i++)
        if (t[i] != t[i - 1]) set.insert(Pair(-lcp[i], i));
    register long long sum = 0;
    while (!set.empty()) {
        register std::set<Pair>::iterator it = set.begin();
        Pair tmp = *it;
        set.erase(it);
        sum -= tmp.first;
        register int x = tmp.second;
        register int prevX = prev[x], nextX = next[x], prevPrevX = prev[prevX];
        if (nextX != tail && t[x] != t[nextX])
            set.erase(Pair(-lcp[nextX], nextX));
        if (prevPrevX != head && t[prevX] != t[prevPrevX])
            set.erase(Pair(-lcp[prevX], prevX));
        if (nextX != tail) {
            lcp[nextX] = SegmentTree::optMin(lcp[nextX], lcp[x]);
            if (prevPrevX != head) {
                lcp[nextX] = SegmentTree::optMin(lcp[nextX], lcp[prevX]);
                if (t[nextX] != t[prevPrevX])
                    set.insert(Pair(-lcp[nextX], nextX));
            }
        }
        prev[nextX] = prevPrevX;
        next[prevPrevX] = nextX;
    }
    io << sum;
}
}

int main() {
    // freopen("match.in", "r", stdin);
    // freopen("match.out", "w", stdout);
    solve();
    return 0;
}


  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值