【codechef】Common Strings（后缀数组）

最新推荐文章于 2020-06-02 19:01:48 发布

ccyy-

最新推荐文章于 2020-06-02 19:01:48 发布

阅读量485

点赞数

分类专栏：线段树&网络流&后缀数组etc 未完成的题目

本文链接：https://blog.csdn.net/cacyth/article/details/48196583

版权

线段树&网络流&后缀数组etc 同时被 2 个专栏收录

23 篇文章 0 订阅

订阅专栏

未完成的题目

12 篇文章 0 订阅

订阅专栏

You are given two strings A and B. Find the number of distinct strings which appear in both A and B . A string s is said to appear in S iff s is a substring (appears contiguously) of S.

Input

The first line of the input contains an integer T denoting the number of test cases. The description ofT test cases follows.
Each test case consists of two lines.
The first line contains two space separated integers n1 and n2 denoting the lengths of A and B.
The second line contains two space separated strings A and B.

Output

For each test case output a single number denoting the number of distinct strings appearing in Aand B .

Constraints

1 ≤ T ≤ 10⁴
1 ≤ n1, n2 ≤ 10⁵
Sum of n1 + n2 over all test cases ≤ 10⁵
A is a string consisting of n1 lowercase characters ('a'-'z').
B is a string consisting of n2 lowercase characters ('a'-'z').

Example

Input:
2
3 5
aad zaacd
4 4
abcd lmno
Output:
3
0

Explanation

Example case 1. The three strings are "a", "d", "aa".

Example case 2. There are no strings that appear in both A and B.

https://www.codechef.com/IOPC2015/problems/IOPC15G/

kuangbin大神的代码。。还没看懂先瞻仰一下。。

#include <stdio.h>
#include <string.h>
#include <iostream>
#include <algorithm>
#include <vector>
#include <queue>
#include <set>
#include <map>
#include <string>
#include <math.h>
#include <stdlib.h>
#include <time.h>
using namespace std;

/*
*suffix array
*倍增算法  O(n*logn)
*待排序数组长度为n,放在0~n-1中，在最后面补一个0
*da(str ,n+1,sa,rank,height,  ,   );//注意是n+1;
*例如：
*n   = 8;
*num[]   = { 1, 1, 2, 1, 1, 1, 1, 2, $ };注意num最后一位为0，其他大于0
*rank[]  = { 4, 6, 8, 1, 2, 3, 5, 7, 0 };rank[0~n-1]为有效值，rank[n]必定为0无效值
*sa[]    = { 8, 3, 4, 5, 0, 6, 1, 7, 2 };sa[1~n]为有效值，sa[0]必定为n是无效值
*height[]= { 0, 0, 3, 2, 3, 1, 2, 0, 1 };height[2~n]为有效值
*
*/
const int MAXN=200010;
int t1[MAXN],t2[MAXN],c[MAXN];//求SA数组需要的中间变量，不需要赋值
//待排序的字符串放在s数组中，从s[0]到s[n-1],长度为n,且最大值小于m,
//除s[n-1]外的所有s[i]都大于0，r[n-1]=0
//函数结束以后结果放在sa数组中
bool cmp(int *r,int a,int b,int l)
{
	return r[a] == r[b] && r[a+l] == r[b+l];
}
void da(int str[],int sa[],int rank[],int height[],int n,int m)
{
    n++;
    int i, j, p, *x = t1, *y = t2;
    //第一轮基数排序，如果s的最大值很大，可改为快速排序
    for(i = 0;i < m;i++)c[i] = 0;
    for(i = 0;i < n;i++)c[x[i] = str[i]]++;
    for(i = 1;i < m;i++)c[i] += c[i-1];
    for(i = n-1;i >= 0;i--)sa[--c[x[i]]] = i;
    for(j = 1;j <= n; j <<= 1)
    {
        p = 0;
        //直接利用sa数组排序第二关键字
        for(i = n-j; i < n; i++)y[p++] = i;//后面的j个数第二关键字为空的最小
        for(i = 0; i < n; i++)if(sa[i] >= j)y[p++] = sa[i] - j;
        //这样数组y保存的就是按照第二关键字排序的结果
        //基数排序第一关键字
        for(i = 0; i < m; i++)c[i] = 0;
        for(i = 0; i < n; i++)c[x[y[i]]]++;
        for(i = 1; i < m;i++)c[i] += c[i-1];
        for(i = n-1; i >= 0;i--)sa[--c[x[y[i]]]] = y[i];
        //根据sa和x数组计算新的x数组
        swap(x,y);
        p = 1; x[sa[0]] = 0;
        for(i = 1;i < n;i++)
            x[sa[i]] = cmp(y,sa[i-1],sa[i],j)?p-1:p++;
        if(p >= n)break;
        m = p;//下次基数排序的最大值
    }
    int k = 0;
    n--;
	for(i = 0;i <= n;i++)rank[sa[i]] = i;
    for(i = 0;i < n;i++)
    {
        if(k)k--;
        j = sa[rank[i]-1];
        while(str[i+k] == str[j+k])k++;
        height[rank[i]] = k;
    }
}
int rank[MAXN],height[MAXN];

int r[MAXN],sa[MAXN];
char str1[MAXN],str2[MAXN];
bool check(int i,int j,int n,int m){
	return (i < n && j > n) || (i > n && j < n);
}

int main()
{
    int T;
	int n,m;
	scanf("%d",&T);
	while(T--){
		scanf("%d%d",&n,&m);
		scanf("%s%s",str1,str2);
		for(int i = 0;i < n;i++)
			r[i] = str1[i]-'a'+1;
		r[n] = 27;
		for(int i = 0;i < m;i++)
			r[n+1+i] = str2[i]-'a'+1;
		r[n+m+1] = 0;
		da(r,sa,rank,height,n+m+1,28);
		long long ans = 0;
		int tmp = 0;
		for(int i = 2;i <= n+m+1;i++){
			tmp = min(tmp,height[i]);
			if(check(sa[i],sa[i-1],n,m)){
				ans += height[i]-tmp;
				tmp = height[i];
			}
		}
		cout<<ans<<endl;
	}
    return 0;
}