【后缀数组】后缀数组复习

不可重叠最长重复子串

poj1743 Musical Theme

/*********************************\
 * @prob: poj1743 Musical Theme  *
 * @auth: Wang Junji             *
 * @stat: Accepted.              *
 * @date: June. 15th, 2012       *
 * @memo: 后缀数组                *
\*********************************/
#include <cstdio>
#include <cstdlib>
#include <algorithm>
#include <cstring>
#include <string>

const int maxN = 20010;

typedef int arr[maxN];
arr wa, wb, ws, wv, sa, r, rank, height;
int n;

inline int& gmin(int& a, const int& b) {return a < b ? a : (a = b);}
inline int& gmax(int& a, const int& b) {return a > b ? a : (a = b);}

inline bool cmp(int* r, int a, int b, int len)
{return r[a] == r[b] && r[a + len] == r[b + len];}

inline void get_sa(int* r, int* sa, int n, int m)
{
    int *x = wa, *y = wb;
    for (int i = 0; i < m; ++i) ws[i] = 0;
    for (int i = 0; i < n; ++i) ++ws[x[i] = r[i]];
    for (int i = 1; i < m; ++i) ws[i] += ws[i - 1];
    for (int i = n - 1; i > -1; --i) sa[--ws[x[i]]] = i;
    for (int j = 1, p = 1; p < n; j <<= 1, m = p)
    {
        p = 0;
        for (int i = n - j; i < n; ++i) y[p++] = i;
        for (int i = 0; i < n; ++i)
            if (sa[i] - j > -1) y[p++] = sa[i] - j;
        for (int i = 0; i < n; ++i) wv[i] = x[y[i]];
        for (int i = 0; i < m; ++i) ws[i] = 0;
        for (int i = 0; i < n; ++i) ++ws[wv[i]];
        for (int i = 1; i < m; ++i) ws[i] += ws[i - 1];
        for (int i = n - 1; i > -1; --i) sa[--ws[wv[i]]] = y[i];
        std::swap(x, y); x[sa[0]] = 0; p = 1;
        for (int i = 1; i < n; ++i)
            x[sa[i]] = cmp(y, sa[i - 1], sa[i], j) ? p - 1 : p++;
    }
    return;
}

inline void get_height(int* r, int* sa, int n)
{
    int k = 0;
    for (int i = 1; i < n + 1; ++i) rank[sa[i]] = i;
    for (int i = 0; i < n; height[rank[i++]] = k)
    {
        int j = sa[rank[i] - 1]; if (k) --k;
        while (r[i + k] == r[j + k]) ++k;
    }
    return;
}

inline bool check(int k)
{
    int min_sa = sa[1], max_sa = sa[1];
    for (int i = 2; i < n + 1; ++i)
    {
        if (height[i] < k)
        {
            if (max_sa - min_sa >= k) return 1;
            else max_sa = min_sa = sa[i];
        }
        else gmax(max_sa, sa[i]), gmin(min_sa, sa[i]);
    }
    return max_sa - min_sa >= k;
}

int main()
{
    freopen("Musical_Theme.in", "r", stdin);
    freopen("Musical_Theme.out", "w", stdout);
    while (scanf("%d", &n) && n)
    {
        for (int i = 0; i < n; ++i) scanf("%d", r + i);
        for (int i = 0; i < n - 1; ++i) r[i] -= r[i + 1] - 100;
        r[--n] = 0;
        get_sa(r, sa, n + 1, 200);
        get_height(r, sa, n);
        int L = 4, R = n + 1, res = 0;
        while (L < R)
        {
            int Mid = (L + R) >> 1;
            check(Mid) ? (res = L = Mid + 1) : (R = Mid);
        }
        printf("%d\n", res);
    }
    return 0;
}

/*

不可重叠最长重复字串问题。
二分答案k,把排序的后缀分成height值不小于k的若干组,若存在一组中的最大sa值和最小sa值不小于k,那么此k成立,否则不成立。
注意最后height数组的取值是1~n而不是0~n-1。

*/


可重叠的K次最长重复子串

poj3261 Milk Patterns

/**********************************\
 * @prob: poj3261 Milk_Patterns   *
 * @auth: Wang Junji              *
 * @stat: Accepted.               *
 * @date: June. 15th, 2012        *
 * @memo: 后缀数组                 *
\**********************************/
#include <cstdio>
#include <cstdlib>
#include <algorithm>
#include <cstring>
#include <string>

const int maxN = 100010;
typedef int arr[maxN];
arr wa, wb, wv, ws, r, rank, sa, height, tab;
int n, K;

inline bool cmp(int* r, int a, int b, int len)
{return r[a] == r[b] && r[a + len] == r[b + len];}

inline void get_sa(int* r, int* sa, int n, int m)
{
    int *x = wa, *y = wb;
    for (int i = 0; i < m; ++i) ws[i] = 0;
    for (int i = 0; i < n; ++i) ++ws[x[i] = r[i]];
    for (int i = 1; i < m; ++i) ws[i] += ws[i - 1];
    for (int i = n - 1; i > -1; --i) sa[--ws[x[i]]] = i;
    for (int j = 1, p = 1; p < n; j <<= 1, m = p)
    {
        p = 0;
        for (int i = n - j; i < n; ++i) y[p++] = i;
        for (int i = 0; i < n; ++i)
            if (sa[i] - j > -1) y[p++] = sa[i] - j;
        for (int i = 0; i < n; ++i) wv[i] = x[y[i]];
        for (int i = 0; i < m; ++i) ws[i] = 0;
        for (int i = 0; i < n; ++i) ++ws[wv[i]];
        for (int i = 1; i < m; ++i) ws[i] += ws[i - 1];
        for (int i = n - 1; i > -1; --i) sa[--ws[wv[i]]] = y[i];
        std::swap(x, y); x[sa[0]] = 0; p = 1;
        for (int i = 1; i < n; ++i)
            x[sa[i]] = cmp(y, sa[i - 1], sa[i], j) ? p - 1 : p++;
    }
    return;
}

inline void get_height(int* r, int* sa, int n)
{
    int k = 0;
    for (int i = 1; i < n + 1; ++i) rank[sa[i]] = i;
    for (int i = 0; i < n; height[rank[i++]] = k)
    {
        int j = sa[rank[i] - 1]; if (k) --k;
        while (r[i + k] == r[j + k]) ++k;
    }
    return;
}

inline bool check(int k)
{
    int pst = 1;
    for (int i = 2; i < n + 1; ++i) if (height[i] < k)
    {
        if (i - pst >= K) return 1;
        else pst = i;
    }
    return n + 1 - pst >= K;
} //

int main()
{
    freopen("Milk_Patterns.in", "r", stdin);
    freopen("Milk_Patterns.out", "w", stdout);
    scanf("%d%d", &n, &K);
    for (int i = 0; i < n; ++i) scanf("%d", r + i), tab[i] = r[i];
    std::sort(tab, tab + n);
    int cnt = std::unique(tab, tab + n) - tab;
    for (int i = 0; i < n; ++i)
        r[i] = std::lower_bound(tab, tab + cnt, r[i]) - tab + 1;
    r[n] = 0;
    get_sa(r, sa, n + 1, cnt + 1);
    get_height(r, sa, n);
    int L = 1, R = n + 1, res = 0;
    while (L < R)
    {
        int Mid = (L + R) >> 1;
        check(Mid) ? (res = Mid, L = Mid + 1) : (R = Mid);
    }
    printf("%d\n", res);
    return 0;
}

/*

可重复的K次最长重复字串。
二分答案k,把排序后的后缀分成height值不小于k的若干组,若存在一组的元素个数不少于K,那么此k成立,否则不成立。
(注意k和K代表的含义不同。)

*/


不相同的子串的个数

spoj694 Distinct Substrings

spoj705 New Distinct Substrings

/*****************************\
 * @prob: spoj694 & spoj705  *
 * @auth: Wang Junji         *
 * @stat: Accepted.          *
 * @date: June. 16th, 2012   *
 * @memo: 后缀数组             *
\*****************************/
#include <cstdio>
#include <cstdlib>
#include <algorithm>
#include <cstring>
#include <string>

const int maxN = 50010;
typedef int arr[maxN];
char str[maxN];
arr wa, wb, ws, wv, r, rank, sa, height;
int n, T;

inline bool cmp(int* r, int a, int b, int len)
{return r[a] == r[b] && r[a + len] == r[b + len];}
/* cmp */

inline void calc_sa(int* r, int* sa, int n, int m)
{
    int *x = wa, *y = wb;
    for (int i = 0; i < m; ++i) ws[i] = 0;
    for (int i = 0; i < n; ++i) ++ws[x[i] = r[i]];
    for (int i = 1; i < m; ++i) ws[i] += ws[i - 1];
    for (int i = n - 1; i > -1; --i) sa[--ws[x[i]]] = i;
    for (int j = 1, p = 1; p < n; j <<= 1, m = p)
    {
        p = 0;
        for (int i = n - j; i < n; ++i) y[p++] = i;
        for (int i = 0; i < n; ++i)
            if (sa[i] - j > -1) y[p++] = sa[i] - j;
        for (int i = 0; i < n; ++i) wv[i] = x[y[i]];
        for (int i = 0; i < m; ++i) ws[i] = 0;
        for (int i = 0; i < n; ++i) ++ws[wv[i]];
        for (int i = 1; i < m; ++i) ws[i] += ws[i - 1];
        for (int i = n - 1; i > -1; --i) sa[--ws[wv[i]]] = y[i];
        std::swap(x, y); x[sa[0]] = 0; p = 1;
        for (int i = 1; i < n; ++i)
            x[sa[i]] = cmp(y, sa[i - 1], sa[i], j) ? p - 1 : p++;
    } /* for */
    return;
} /* calc_sa */

inline void calc_height(int* r, int* sa, int n)
{
    int k = 0;
    for (int i = 1; i < n + 1; ++i) rank[sa[i]] = i;
    for (int i = 0; i < n; height[rank[i++]] = k)
    {
        int j = sa[rank[i] - 1]; if (k) --k;
        while (r[i + k] == r[j + k]) ++k;
    } /* for */
    return;
} /* calc_height */

int main()
{
    freopen("substr.in", "r", stdin);
    freopen("substr.out", "w", stdout);
    scanf("%d", &T);
    while (T--)
    {
        scanf("%s", str); n = strlen(str);
        for (int i = 0; i < n; ++i) r[i] = str[i];
        r[n] = 0;
        calc_sa(r, sa, n + 1, 128);
        calc_height(r, sa, n);
        int ans = 0;
        for (int i = 1; i < n + 1; ++i)
            ans += n - sa[i] - height[i];
        printf("%d\n", ans);
    } /* while */
    return 0;
} /* main */

/*

由于原串的子串一定是某个后缀的前缀,那么原问题等价于求出所有后缀中不相同的前缀个数。
原串的每个后缀i贡献出n - i个前缀,那么若按照字典序,则每个后缀sa[i]贡献出n - sa[i] - height[i]个与前面不同的前缀出来,所以只需要将这些值累加即可。

*/


最长回文子串

ural1297 Palindrome

/******************************\
 * @prob: NOI1297 Palindrome  *
 * @auth: Wang Junji          *
 * @stat: Accepted.           *
 * @date: June. 16th, 2012    *
 * @memo: 后缀数组             *
\******************************/
#include <cstdio>
#include <cstdlib>
#include <algorithm>
#include <cstring>
#include <string>

const int maxN = 2010;
typedef int arr[maxN];
arr wa, wb, ws, wv, r, rank, sa, height;
int f[20][maxN], n, pos;
char str[maxN];

inline bool cmp(int* r, int a, int b, int len)
{return r[a] == r[b] && r[a + len] == r[b + len];}
/* cmp */

inline void calc_sa(int* r, int* sa, int n, int m)
{
    int *x = wa, *y = wb;
    for (int i = 0; i < m; ++i) ws[i] = 0;
    for (int i = 0; i < n; ++i) ++ws[x[i] = r[i]];
    for (int i = 1; i < m; ++i) ws[i] += ws[i - 1];
    for (int i = n - 1; i > -1; --i) sa[--ws[x[i]]] = i;
    for (int j = 1, p = 1; p < n; j <<= 1, m = p)
    {
        p = 0;
        for (int i = n - j; i < n; ++i) y[p++] = i;
        for (int i = 0; i < n; ++i)
            if (sa[i] - j > -1) y[p++] = sa[i] - j;
        for (int i = 0; i < n; ++i) wv[i] = x[y[i]];
        for (int i = 0; i < m; ++i) ws[i] = 0;
        for (int i = 0; i < n; ++i) ++ws[wv[i]];
        for (int i = 1; i < m; ++i) ws[i] += ws[i - 1];
        for (int i = n - 1; i > -1; --i) sa[--ws[wv[i]]] = y[i];
        std::swap(x, y); x[sa[0]] = 0; p = 1;
        for (int i = 1; i < n; ++i)
            x[sa[i]] = cmp(y, sa[i - 1], sa[i], j) ? p - 1 : p++;
    } /* for */
    return;
} /* calc_sa */

inline void calc_height(int* r, int* sa, int n)
{
    int k = 0;
    for (int i = 1; i < n + 1; ++i) rank[sa[i]] = i;
    for (int i = 0; i < n; height[rank[i++]] = k)
    {
        int j = sa[rank[i] - 1]; if (k) --k;
        while (r[i + k] == r[j + k] && r[i + k]) ++k;
    } /* for */
    return;
} /* calc_height */

inline void rmq_init()
{
    for (int i = 1; i < n + 1; ++i) f[0][i] = height[i];
    for (int q = 0; 1 << q < n; ++q)
    for (int i = 1; i + (1 << q) < n + 2; ++i)
        f[q + 1][i] = std::min(f[q][i], f[q][i + (1 << q)]);
} /* rmq_init */

inline int LCP(int a, int b)
{
    a = rank[a], b = rank[b];
    if (a > b) std::swap(a, b); ++a; int q = 0;
    while (1 << q < b - a + 2) ++q; --q;
    return std::min(f[q][a], f[q][b - (1 << q) + 1]);
} /* LCP */

int main()
{
    freopen("Palindrome.in", "r", stdin);
    freopen("Palindrome.out", "w", stdout);
    scanf("%s", str); pos = strlen(str); str[pos] = ' ';
    strncpy(str + pos + 1, str, pos); n = strlen(str);
    std::reverse(str + pos + 1, str + n);
    for (int i = 0; i < n; ++i) r[i] = str[i] - ' ';
    r[n] = 0;
    calc_sa(r, sa, n + 1, 128);
    calc_height(r, sa, n);
    rmq_init();
    int ans = 0, res = 0;
    for (int i = 0; i < pos; ++i)
    {
        int ths = LCP(i, n - i - 1); ths <<= 1, --ths;
        if (ths > ans) ans = ths, res = i - (ths >> 1);
        ths = LCP(i, n - i); ths <<= 1;
        if (ths > ans) ans = ths, res = i - (ths >> 1);
    } /* for */
    for (int i = res; i < res + ans; ++i) putchar(str[i]);
    printf("\n");
    return 0;
} /* main */

/*

最长回文串。
将原串和反转过后的串与连接起来,中间用一个未出现过的字符连接,于是原问题就变成了求这个新字符串的某两个后缀的最长公共前缀。
枚举中心位置,分奇偶讨论回文串的长度,取出最长的解即可。

*/


连续重复子串

poj2406 Power Strings

/*********************************\
 * @prob: poj2406 Power_Strings  *
 * @auth: Wang Junji             *
 * @stat: Accepted.              *
 * @date: June. 15th, 2012       *
 * @memo: 暴力匹配                *
\*********************************/
#include <cstdio>
#include <cstring>

const int maxN = 1000010;
char str[maxN]; int n, ans;

inline bool check(int len)
{
    for (int i = 0; i + len < n; ++i)
        if (str[i] - str[i + len]) return 0;
    return 1;
}

int main()
{
    freopen("Power_Strings.in", "r", stdin);
    freopen("Power_Strings.out", "w", stdout);
    while (scanf("%s", str) != EOF && strcmp(str, "."))
    {
        n = strlen(str);
        for (int i = 1; i < n + 1; ++i)
            if (n % i == 0 && check(i)) {ans = n / i; break;}
        printf("%d\n", ans);
    }
    return 0;
}


重复次数最多的连续重复子串

poj3693 Maximum repetition substring

/************************************************\
 * @prob: poj3693 Maximum repetition substring  *
 * @auth: Wang Junji       * @stat: Accepted.   *
 * @date: June. 15th, 2012 * @memo: 后缀数组     *
\************************************************/
#include <cstdio>
#include <cstdlib>
#include <algorithm>
#include <cstring>
#include <string>

const int maxN = 100010;
typedef int arr[maxN];
arr wa, wb, wv, ws, r, rank, height, sa, tab;
int f[20][maxN], n, top; char str[maxN];

inline bool cmp(int* r, int a, int b, int len)
{return r[a] == r[b] && r[a + len] == r[b + len];}

inline void get_sa(int* r, int* sa, int n, int m)
{
    int *x = wa, *y = wb;
    for (int i = 0; i < m; ++i) ws[i] = 0;
    for (int i = 0; i < n; ++i) ++ws[x[i] = r[i]];
    for (int i = 1; i < m; ++i) ws[i] += ws[i - 1];
    for (int i = n - 1; i > -1; --i) sa[--ws[x[i]]] = i;
    for (int j = 1, p = 1; p < n; j <<= 1, m = p)
    {
        p = 0;
        for (int i = n - j; i < n; ++i) y[p++] = i;
        for (int i = 0; i < n; ++i)
            if (sa[i] - j > -1) y[p++] = sa[i] - j;
        for (int i = 0; i < n; ++i) wv[i] = x[y[i]];
        for (int i = 0; i < m; ++i) ws[i] = 0;
        for (int i = 0; i < n; ++i) ++ws[wv[i]];
        for (int i = 1; i < m; ++i) ws[i] += ws[i - 1];
        for (int i = n - 1; i > -1; --i) sa[--ws[wv[i]]] = y[i];
        std::swap(x, y); x[sa[0]] = 0; p = 1;
        for (int i = 1; i < n; ++i)
            x[sa[i]] = cmp(y, sa[i - 1], sa[i], j) ? p - 1 : p++;
    }
    return;
}

inline void get_height(int* r, int* sa, int n)
{
    int k = 0;
    for (int i = 1; i < n + 1; ++i) rank[sa[i]] = i;
    for (int i = 0; i < n; height[rank[i++]] = k)
    {
        int j = sa[rank[i] - 1]; if (k) --k;
        while (r[i + k] == r[j + k]) ++k;
    }
    return;
}

inline void rmq_init()
{
    for (int i = 1; i < n + 1; ++i) f[0][i] = height[i];
    for (int q = 0; 1 << q < n; ++q)
    for (int i = 1; i + (1 << q) < n + 2; ++i)
        f[q + 1][i] = std::min(f[q][i], f[q][i + (1 << q)]);
    return;
}

inline int LCP(int a, int b)
{
    a = rank[a], b = rank[b];
    if (a > b) std::swap(a, b); ++a;
    int q = 0; for (; 1 << q < b - a + 2; ++q); --q;
    return std::min(f[q][a], f[q][b - (1 << q) + 1]);
}

int main()
{
    freopen("substr.in", "r", stdin);
    freopen("substr.out", "w", stdout);
    int Case = 0;
    while (scanf("%s", str) != EOF && strcmp(str, "#"))
    {
        n = strlen(str);
        for (int i = 0; i < n; ++i) r[i] = str[i] - 'a' + 1;
        r[n] = 0;
        get_sa(r, sa, n + 1, 27);
        get_height(r, sa, n);
        rmq_init();
        int _cnt = 1, _pos = 0, _len = n;
        for (int len = 1; len < n; ++len)
        for (int i = 0; i + len < n; i += len)
        {
            int K = LCP(i, i + len), cnt = K / len + 1, pos = i - len + K % len;
            if (pos > -1 && K % len && LCP(pos, pos + len) >= K) ++cnt;
            if (cnt > _cnt) _cnt = cnt, tab[(top = 0)++] = len;
            if (cnt == _cnt) tab[top++] = len;
        }
        bool flag = 0;
        for (int i = 1; i < n + 1 && !flag; ++i)
        {
            int ths = sa[i];
            for (int j = 0; j < top; ++j)
            if (LCP(ths, ths + tab[j]) / tab[j] + 1 == _cnt)
            {
                _pos = ths, _len = tab[j]; flag = 1;
                break;
            }
        }
        printf("Case %d: ", ++Case);
        for (int i = _pos; i < _pos + _cnt * _len; ++i) putchar(str[i]);
        printf("\n");
    }
    return 0;
}

/*

重复次数最多的连续重复子串。
枚举长度len(即重复字串的循环节),然后求出长度为len的子串最多能出现几次。
设长度为len的子串在原串中出现了cnt次,那么这个长度为len * cnt的子串中一定包含了str[0], str[len], str[len * 2], ...中的cnt个,所以只需要看str[i]和str[i + len]往前和往后各能匹配多远。记能够匹配的总长度为K,那么cnt = K / len + 1,若K不能被len整除,则还需要看str[i - len + K % len]和str[i + K % len]能匹配多远,若能够匹配的长度不小于k,那么令此时的cnt加1。

要保证字典序,需要将所有重复了cnt次的可能的循环节长度全部记录下来。然后按后缀数组的顺序从头开始枚举起始位置,并且对于每一个起始位置都枚举一遍所有可能的循环节长度,第一次找到的符合要求的解即为最终的解。

*/


最长公共子串

poj2774 Long Long Message

/*************************************\
 * @prob: poj2774 Long Long Message  *
 * @auth: Wang Junji                 *
 * @stat: Accepted.                  *
 * @date: June. 15th, 2012           *
 * @memo: 后缀数组                    *
\*************************************/
#include <cstdio>
#include <cstdlib>
#include <algorithm>
#include <cstring>
#include <string>

const int maxN = 200010;
typedef int arr[maxN];
arr wa, wb, ws, wv, r, rank, sa, height;
int n, pos; char str[maxN];

inline bool cmp(int* r, int a, int b, int len)
{return r[a] == r[b] && r[a + len] == r[b + len];}

inline void get_sa(int* r, int* sa, int n, int m)
{
    int *x = wa, *y = wb;
    for (int i = 0; i < m; ++i) ws[i] = 0;
    for (int i = 0; i < n; ++i) ++ws[x[i] = r[i]];
    for (int i = 1; i < m; ++i) ws[i] += ws[i - 1];
    for (int i = n - 1; i > -1; --i) sa[--ws[x[i]]] = i;
    for (int j = 1, p = 1; p < n; j <<= 1, m = p)
    {
        p = 0;
        for (int i = n - j; i < n; ++i) y[p++] = i;
        for (int i = 0; i < n; ++i)
            if (sa[i] - j > -1) y[p++] = sa[i] - j;
        for (int i = 0; i < n; ++i) wv[i] = x[y[i]];
        for (int i = 0; i < m; ++i) ws[i] = 0;
        for (int i = 0; i < n; ++i) ++ws[wv[i]];
        for (int i = 1; i < m; ++i) ws[i] += ws[i - 1];
        for (int i = n - 1; i > -1; --i) sa[--ws[wv[i]]] = y[i];
        std::swap(x, y); x[sa[0]] = 0; p = 1;
        for (int i = 1; i < n; ++i)
            x[sa[i]] = cmp(y, sa[i - 1], sa[i], j) ? p - 1 : p++;
    }
    return;
}

inline void get_height(int* r, int* sa, int n)
{
    int k = 0;
    for (int i = 1; i < n + 1; ++i) rank[sa[i]] = i;
    for (int i = 0; i < n; height[rank[i++]] = k)
    {
        int j = sa[rank[i] - 1]; if (k) --k;
        while (r[i + k] == r[j + k]) ++k;
    }
    return;
}

inline int& gmax(int& a, const int& b) {return a > b ? a : (a = b);}

int main()
{
    freopen("message.in", "r", stdin);
    freopen("message.out", "w", stdout);
    scanf("%s", str);
    pos = strlen(str);
    strcat(str, " ");
    scanf("%s", str + pos + 1);
    n = strlen(str);
    for (int i = 0; i < n; ++i) r[i] = str[i];
    get_sa(r, sa, n + 1, 128);
    get_height(r, sa, n);
    int ans = 0;
    for (int i = 1; i < n + 1; ++i)
    if ((sa[i] < pos && sa[i - 1] > pos) ||
        (sa[i] > pos && sa[i - 1] < pos))
        gmax(ans, height[i]);
    printf("%d\n", ans);
    return 0;
}

/*

最长公共子串。
把两个字符串连接在一起,中间用一个比特殊字符隔开(比任何字符都小)。
根据height数组来找,若相邻的两个后缀分别属于两个字符串(即一个在特殊字符前一个在特殊字符后),那么取所有满足此条件的最大height值。

*/


长度不小于K的公共子串的个数

poj3415 Common Substrings

/*************************************\
 * @prob: poj3415 Common Substrings  *
 * @auth: Wang Junji                 *
 * @stat: Time Limit Exceeded.       *
 * @date: June. 16th, 2012           *
 * @memo: 后缀数组                    *
\*************************************/
#include <cstdio>
#include <cstdlib>
#include <algorithm>
#include <cstring>
#include <string>

const int maxN = 200010;
typedef int arr[maxN];
arr wa, wb, ws, wv, r, rank, sa, height, sta;
char str[maxN]; int f[20][maxN], n, pos, K;

inline bool cmp(int* r, int a, int b, int len)
{return r[a] == r[b] && r[a + len] == r[b + len];}

inline void get_sa(int* r, int* sa, int n, int m)
{
    int *x = wa, *y = wb;
    for (int i = 0; i < m; ++i) ws[i] = 0;
    for (int i = 0; i < n; ++i) ++ws[x[i] = r[i]];
    for (int i = 1; i < m; ++i) ws[i] += ws[i - 1];
    for (int i = n - 1; i > -1; --i) sa[--ws[x[i]]] = i;
    for (int j = 1, p = 1; p < n; j <<= 1, m = p)
    {
        p = 0;
        for (int i = n - j; i < n; ++i) y[p++] = i;
        for (int i = 0; i < n; ++i)
            if (sa[i] - j > -1) y[p++] = sa[i] - j;
        for (int i = 0; i < n; ++i) wv[i] = x[y[i]];
        for (int i = 0; i < m; ++i) ws[i] = 0;
        for (int i = 0; i < n; ++i) ++ws[wv[i]];
        for (int i = 1; i < m; ++i) ws[i] += ws[i - 1];
        for (int i = n - 1; i > -1; --i) sa[--ws[wv[i]]] = y[i];
        std::swap(x, y); x[sa[0]] = 0; p = 1;
        for (int i = 1; i < n; ++i)
            x[sa[i]] = cmp(y, sa[i - 1], sa[i], j) ? p - 1 : p++;
    }
    return;
}

inline void get_height(int* r, int* sa, int n)
{
    int k = 0;
    for (int i = 1; i < n + 1; ++i) rank[sa[i]] = i;
    for (int i = 0; i < n; height[rank[i++]] = k)
    {
        int j = sa[rank[i] - 1]; if (k) --k;
        while (r[i + k] == r[j + k]) ++k;
    }
    return;
}

inline void rmq_init()
{
    for (int i = 1; i < n + 1; ++i) f[0][i] = height[i];
    for (int q = 0; 1 << q < n; ++q)
    for (int i = 1; i + (1 << q) < n + 2; ++i)
        f[q + 1][i] = std::min(f[q][i], f[q][i + (1 << q)]);
    return;
}

inline int LCP(int a, int b)
{
    a = rank[a], b = rank[b];
    if (a > b) std::swap(a, b); ++a;
    int q = 0; while (1 << q < b - a + 2) ++q; --q;
    return std::min(f[q][a], f[q][b - (1 << q) + 1]);
}

int main()
{
    freopen("common_substr.in", "r", stdin);
    freopen("common_substr.out", "w", stdout);
    while (scanf("%d", &K) != EOF && K)
    {
        scanf("%s", str); pos = strlen(str); strcat(str, " ");
        scanf("%s", str + pos + 1); n = strlen(str);
        for (int i = 0; i < n; ++i) r[i] = str[i];
        r[n] = 0;
        get_sa(r, sa, n + 1, 128);
        get_height(r, sa, n);
        rmq_init();
        int top = 0, ans = 0;
        for (int i = 1; i < n + 1; ++i)
        {
            if (height[i] < K) top = 0;
            if (sa[i] > pos)
            for (int j = 0; j < top; ++j)
                ans += LCP(sta[j], sa[i]) - K + 1;
            if (sa[i] < pos) sta[top++] = sa[i];
        }
        top = 0;
        for (int i = 1; i < n + 1; ++i)
        {
            if (height[i] < K) top = 0;
            if (sa[i] < pos)
            for (int j = 0; j < top; ++j)
                ans += LCP(sta[j], sa[i]) - K + 1;
            if (sa[i] > pos) sta[top++] = sa[i];
        }
        printf("%d\n", ans);
    }
    return 0;
}

/*

长度不小于K的公共子串的个数。
首先按照height值不小于K的原则分组,然后在每一组当中统计每组中后缀之间的最长公共前缀之和。扫描一遍,每遇到一个B就统计与之前的A的后缀能产生多少个长度不小于K的公共子串。

*/


每个字符串至少出现两次且互不重叠的最长子串

spoj220 Relevant Phrases of Annihilation

/**********************************************************\
 * @prob: spoj220 Relevant Phrases of Annihilation        *
 * @auth: Wang Junji       * @stat: Time Limit Exceeded.  *
 * @date: June. 16th, 2012 * @memo: 后缀数组               *
\**********************************************************/
#include <cstdio>
#include <cstdlib>
#include <algorithm>
#include <cstring>
#include <string>

const int maxN = 100010, maxM = 20;
typedef int arr[maxN];
arr wa, wb, ws, wv, r, rank, sa, height;
int pos[maxM], len[maxM], n, N, T;
char str[maxN];

inline bool cmp(int* r, int a, int b, int len)
{return r[a] == r[b] && r[a + len] == r[b + len];}
/* cmp */

inline void calc_sa(int* r, int* sa, int n, int m)
{
    int *x = wa, *y = wb;
    for (int i = 0; i < m; ++i) ws[i] = 0;
    for (int i = 0; i < n; ++i) ++ws[x[i] = r[i]];
    for (int i = 1; i < m; ++i) ws[i] += ws[i - 1];
    for (int i = n - 1; i > -1; --i) sa[--ws[x[i]]] = i;
    for (int j = 1, p = 1; p < n; j <<= 1, m = p)
    {
        p = 0;
        for (int i = n - j; i < n; ++i) y[p++] = i;
        for (int i = 0; i < n; ++i)
            if (sa[i] - j > -1) y[p++] = sa[i] - j;
        for (int i = 0; i < n; ++i) wv[i] = x[y[i]];
        for (int i = 0; i < m; ++i) ws[i] = 0;
        for (int i = 0; i < n; ++i) ++ws[wv[i]];
        for (int i = 1; i < m; ++i) ws[i] += ws[i - 1];
        for (int i = n - 1; i > -1; --i) sa[--ws[wv[i]]] = y[i];
        std::swap(x, y); x[sa[0]] = 0; p = 1;
        for (int i = 1; i < n; ++i)
            x[sa[i]] = cmp(y, sa[i - 1], sa[i], j) ? p - 1 : p++;
    } /* for */
    return;
} /* calc_sa */

inline void calc_height(int* r, int* sa, int n)
{
    int k = 0;
    for (int i = 1; i < n + 1; ++i) rank[sa[i]] = i;
    for (int i = 0; i < n; height[rank[i++]] = k)
    {
        int j = sa[rank[i] - 1]; if (k) --k;
        while (r[i + k] == r[j + k] && r[i + k]) ++k;
    } /* for */
    return;
} /* calc_height */

inline int plc(const int& x)
{return std::upper_bound(pos, pos + N, x) - pos - 1;}
/* plc */

inline int& gmax(int& a, const int& b) {return a > b ? a : (a = b);} /* gmax */
inline int& gmin(int& a, const int& b) {return a < b ? a : (a = b);} /* gmin */

inline bool check(int k)
{
    static int min_sa[maxM], max_sa[maxN];
    memset(min_sa, 0x3f, sizeof min_sa);
    memset(max_sa, 0xff, sizeof max_sa);
    int tmp = plc(sa[1]);
    min_sa[tmp] = max_sa[tmp] = sa[1];
    for (int i = 2; i < n + 1; ++i)
    {
        if (height[i] < k)
        {
            bool flag = 1;
            for (int j = 0; j < N; ++j)
            if (max_sa[j] - min_sa[j] < k)
            {
                flag = 0; break;
            } /* if */
            if (flag) return 1;
            memset(min_sa, 0x3f, sizeof min_sa);
            memset(max_sa, 0xff, sizeof max_sa);
            int tmp = plc(sa[i]);
            min_sa[tmp] = max_sa[tmp] = sa[i];
            continue;
        } /* if */
        int tmp = plc(sa[i]);
        gmin(min_sa[tmp], sa[i]),
        gmax(max_sa[tmp], sa[i]);
    } /* for */
    bool flag = 1;
    for (int j = 0; j < N; ++j)
    if (max_sa[j] - min_sa[j] < k)
    {
        flag = 0; break;
    } /* if */
    return flag;
} /* check */

int main()
{
    freopen("phrases.in", "r", stdin);
    freopen("phrases.out", "w", stdout);
    scanf("%d", &T);
    while (T--)
    {
        scanf("%d", &N); int pst = 0, max_len = 0;
        for (int i = 0; i < N; ++i)
        {
            pos[i] = pst;
            scanf("%s", str + pst);
            len[i] = strlen(str + pst);
            gmax(max_len, len[i]);
            str[pst + len[i]] = ' ';
            pst += len[i] + 1;
        } /* for */
        n = strlen(str), str[n--] = 0;
        for (int i = 0; i < n; ++i) r[i] = str[i] - ' ';
        calc_sa(r, sa, n + 1, 128);
        calc_height(r, sa, n);
        int L = 0, R = max_len + 1, res = 0;
        while (L < R)
        {
            int Mid = (L + R) >> 1;
            check(Mid) ? (res = Mid, L = Mid + 1) : (R = Mid);
        } /* while */
        printf("%d\n", res);
    } /* while */
    return 0;
} /* main */

/*

每个字符串至少出现两次且不重叠的最长子串。
二分答案K,按height值分组,然后判断每一组中的字符串是否在每一个串中都出现过,并且在每一个串中出现的最大位置和最小位置之差不小于K。

*/


出现在大于一半的字符串中的最长子串

poj3294 Life Forms

/************************************\
 * @prob: poj3294 Life_Forms        *
 * @auth: Wang Junji                *
 * @stat: Accepted.                 *
 * @date: June. 16th, 2012          *
 * @memo: 后缀数组                   *
\************************************/
#include <cstdio>
#include <cstdlib>
#include <algorithm>
#include <cstring>
#include <string>
#include <bitset>

using std::upper_bound;

const int maxN = 120010, maxM = 110, maxL = 1010;
typedef int arr[maxN];
arr wa, wb, ws, wv, r, rank, sa, height;
int len[maxM], pos[maxM], n, N;
char tmp_str[maxM][maxL], str[maxN];
std::bitset <maxM> marked;

inline bool cmp(int* r, int a, int b, int len)
{return r[a] == r[b] && r[a + len] == r[b + len];}

inline void calc_sa(int* r, int* sa, int n, int m)
{
    int *x = wa, *y = wb;
    for (int i = 0; i < m; ++i) ws[i] = 0;
    for (int i = 0; i < n; ++i) ++ws[x[i] = r[i]];
    for (int i = 1; i < m; ++i) ws[i] += ws[i - 1];
    for (int i = n - 1; i > -1; --i) sa[--ws[x[i]]] = i;
    for (int j = 1, p = 1; p < n; j <<= 1, m = p)
    {
        p = 0;
        for (int i = n - j; i < n; ++i) y[p++] = i;
        for (int i = 0; i < n; ++i)
            if (sa[i] - j > -1) y[p++] = sa[i] - j;
        for (int i = 0; i < n; ++i) wv[i] = x[y[i]];
        for (int i = 0; i < m; ++i) ws[i] = 0;
        for (int i = 0; i < n; ++i) ++ws[wv[i]];
        for (int i = 1; i < m; ++i) ws[i] += ws[i - 1];
        for (int i = n - 1; i > -1; --i) sa[--ws[wv[i]]] = y[i];
        std::swap(x, y); x[sa[0]] = 0; p = 1;
        for (int i = 1; i < n; ++i)
            x[sa[i]] = cmp(y, sa[i - 1], sa[i], j) ? p - 1 : p++;
    }
    return;
}

inline void calc_height(int* r, int* sa, int n)
{
    int k = 0;
    for (int i = 1; i < n + 1; ++i) rank[sa[i]] = i;
    for (int i = 0; i < n; height[rank[i++]] = k)
    {
        int j = sa[rank[i] - 1]; if (k) --k;
        while (r[i + k] == r[j + k] && r[i + k]) ++k;
    }
    return;
}

inline int& gmax(int& a, const int& b) {return a > b ? a : (a = b);}

inline bool check(int k)
{
    marked.reset();
    marked.set(upper_bound(pos, pos + N, sa[1]) - pos - 1);
    for (int i = 2; i < n + 1; ++i)
    {
        if (height[i] < k)
        {
            if (marked.count() > N >> 1) return 1;
            marked.reset();
        }
        marked.set(upper_bound(pos, pos + N, sa[i]) - pos - 1);
    }
    return marked.count() > N >> 1;
}

int main()
{
    freopen("Life_Forms.in", "r", stdin);
    freopen("Life_Forms.out", "w", stdout);
    while (scanf("%d", &N) != EOF && N)
    {
        if (N == 1)
        {
            scanf("%s", str);
            puts(str);
            printf("\n");
            continue;
        }
        int max_len = 0;
        for (int i = 0; i < N; ++i)
            scanf("%s", tmp_str[i]),
            len[i] = strlen(tmp_str[i]),
            gmax(max_len, len[i]);
        pos[0] = 0;
        strcpy(str, tmp_str[0]);
        str[len[0]] = ' ';
        for (int i = 1; i < N; ++i)
        {
            pos[i] = pos[i - 1] + (len[i - 1] + 1);
            str[pos[i] - 1] = ' ';
            strcpy(str + pos[i], tmp_str[i]);
        }
        n = strlen(str);
        for (int i = 0; i < n; ++i) r[i] = str[i] - ' ';
        r[n] = 0;
        calc_sa(r, sa, n + 1, 128);
        calc_height(r, sa, n);
        int L = 0, R = max_len + 1, res = 0;
        while (L < R)
        {
            int Mid = (L + R) >> 1;
            check(Mid) ? (res = Mid, L = Mid + 1) : (R = Mid);
        }
        if (!res) printf("?\n\n");
        else
        {
            marked.reset();
            marked.set(upper_bound(pos, pos + N, sa[1]) - pos - 1);
            for (int i = 2; i < n + 1; ++i)
            {
                if (height[i] < res)
                {
                    if (marked.count() > N >> 1)
                    {
                        for (int j = sa[i - 1]; j < sa[i - 1] + res; ++j)
                            putchar(str[j]);
                        printf("\n");
                    }
                    marked.reset();
                }
                marked.set(upper_bound(pos, pos + N, sa[i]) - pos - 1);
            }
            if (marked.count() > N >> 1)
            {
                for (int j = sa[n]; j < sa[n] + res; ++j)
                    putchar(str[j]);
                printf("\n");
            }
            printf("\n");
        }
    }
    return 0;
}

/*

出现在大于一半的字符串中的最长子串。

先把所有字符串连接起来,中间用一个没有出现过的字符连接。二分答案的长度k,将后缀分成height值不小于k的若干组,然后看是否至少存在一组中的后缀在大于一半的字符串中出现过,若是,则此k成立,否则不成立。输出时按照字典序(sa的顺序)扫描一遍,将所有符合条件的部分都输出即可。

*/

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值