字符串匹配进阶

最新推荐文章于 2024-07-16 18:35:54 发布

XINNNNNNNYU

最新推荐文章于 2024-07-16 18:35:54 发布

阅读量108

点赞数

分类专栏：题解笔记 ICPC 文章标签：算法 c++ 开发语言

本文链接：https://blog.csdn.net/xinnnnnyu/article/details/128237088

版权

题解同时被 3 个专栏收录

7 篇文章 0 订阅

订阅专栏

笔记

6 篇文章 0 订阅

订阅专栏

ICPC

3 篇文章 0 订阅

订阅专栏

FFT

A位模式串，长度为n；B为文本串，长度为m。

1.没有通配符的

首先引入一个匹配函数 $C (x, y) = A (x) - B (y)$ ,当C(x,y) = 0，我们认为A的第x字符和B的第y个字符相等。
再将一个字符匹配扩展到一个字符串。
那么，完全匹配函数 $=\displaystyle \sum^{m-1}_{i=0} C(i,x-m+i+1)$ ,若P(x) = 0，则B以x结尾的连续m位子串和A完全匹配。
上面的完全匹配函数存在一些错误
- 把求和函数拆开会发现就是匹配的两个串的每个字符分别相加，然后；两个和相减。那么只要字符的类型，个数对应相同就被判定相同，忽略了每个字符的对应关系。
- 产生错误的原因是每个字符的匹配函数有正有负，求和可能相互抵消为0。
- 解决方法就是把匹配函数 $C (x, y)$ 平方，因为若干个平方数相加=0只能是每一项都为0，和我们完全匹配时每个位置的 $C (x, y) = 0$ 时等价的。
- 等价转化—— 我们把A反转变成S，那么A(i) = S(m-i-1)。因为这样子匹配函数的x，y相加总是等于一个常数。（这很重要）
正确的完全匹配函数为 $=\displaystyle \sum^{m-1}_{i=0} (S[m-i-1] - B[x-m+i+1])^2$
将函数暴力展开为 $=\displaystyle \sum^{m-1}_{i=0} ({S[m-i-1]}^2 + {B[x-m+i+1]}^2 - 2 S[m-i-1] B[x-m+i+1])$
再展开为三部分
1. $\displaystyle \sum^{m-1}_{i=0} {S[m-i-1]}^2$ 显然，这是个定值记作T，我们可以 $O (m)$ 暴力预处理
2. $\displaystyle \sum^{m-1}_{i=0}{B[x-m+i+1]}^2$ 这是一个长度为m的区间和，我们通过预处理前缀和数组f ( x )，然后 $O (1)$ 得到
3. $\displaystyle -2\sum^{m-1}_{i=0}S[m-i-1] B[x-m+i+1]$ 通过上面我们把A用他的反转S替换后，我们发现(m-i-1)+(x-m+i+1) 恒等于x。那么公式为 $\displaystyle -2\sum_{i+i=x}S[i] B[j]$ 。
  - 另 $\displaystyle g(x) = \sum_{i+i=x}S[i] B[j]$ ，显然这是个多项式乘法，可以通过FFT求出所有值
$P (x) = T + f (x) - f (x - m) - 2 g (x)$ ，每个函数都可以通过上述求出，再 $O (n)$ 求出所有P(x)

有通配符的

调整匹配函数——和上述没有通配符的相同，但是我们要去掉通配符位置的贡献。
将原字符串是通配符的值设为0，再修改完全匹配匹配函数为 $=\displaystyle \sum^{m-1}_{i=0} (S[m-i-1] - B[x-m+i+1])^2S[m-i-1]B[x-m+i+1]$

其中一个为通配符就会乘上0，最后贡献为0
暴力展开-> $=\displaystyle \sum^{m-1}_{i=0} ({S[m-i-1]}^3B[x-m+i+1] + {B[x-m+i+1]}^3S[m-i-1] - 2 (S[m-i-1] B[x-m+i+1])^2)$

即 $=\displaystyle \sum_{i+j=x} {S[i]}^3B[j] + \sum_{i+j=x}{B[j]}^3S[i] - 2 \sum_{i+j=x}(S[i] B[j])^2$
6次FFT即可求的,再一次逆FFT回去即可

FFT板子

//复数类
struct Complex
{
    double x, y;
    Complex operator+ (const Complex& t) const
    {
        return {x + t.x, y + t.y};
    }
    Complex operator- (const Complex& t) const
    {
        return {x - t.x, y - t.y};
    }
    Complex operator* (const Complex& t) const
    {
        return {x * t.x - y * t.y, x * t.y + y * t.x};
    }
}a[N], b[N];//原来的多项式

int rev[N], bit, tot;

void fft(Complex a[], int inv)//1:系数表示法->点表示法；-1:点表示法->系数表示法
{
    for (int i = 0; i < tot; i ++ )
        if (i < rev[i])//否则就会交换两次换回原来
            swap(a[i], a[rev[i]]);
    for (int mid = 1; mid < tot; mid <<= 1)
    {
        auto w1 = Complex({cos(PI / mid), inv * sin(PI / mid)});
        for (int i = 0; i < tot; i += mid * 2)
        {
            auto wk = Complex({1, 0});
            for (int j = 0; j < mid; j ++, wk = wk * w1)
            {
                auto x = a[i + j], y = wk * a[i + j + mid];
                a[i + j] = x + y, a[i + j + mid] = x - y;
            }
        }
    }
    if(inv==-1){
        rep(i,0,tot-1) a[i].x/=tot;
    }
}

FFT匹配板子（带通配符）

int rev[maxn], tot, bit;
double a[maxn], b[maxn];
void FFT_MATCH(string s1, string s2, int m, int n){//s1模式串（短），s2文本串（长）
    reverse(all(s1));
    //应避免某个字符的值为0，选取字符集中不存在的字符减去即可
    rep(i,0,m) a[i] = s1[i]!='*' ? (s1[i]-'a') : 0;
    rep(i,0,n) b[i] = s2[i]!='*' ? (s2[i]-'a') : 0;
    while((1<<bit) < n+m+1) bit++;
	tot = 1<< bit; 
	// debug(tot);
    for (int i = 0; i < tot; i ++ )
        rev[i] = (rev[i >> 1] >> 1) | ((i & 1) << (bit - 1));
    rep(i,0,tot-1) A[i].x = a[i]*a[i];
    rep(i,0,tot-1) B[i].x = b[i]*b[i];
    FFT(A,1);
    FFT(B,1);
    Complex t;
    t.x = 2;
    rep(i,0,tot-1) P[i] = P[i]-A[i]*B[i]*t;

    //循环到tot-1是因为上面卷积后会有系数大于n，m的项（上面第一次可以知道n，m）
    rep(i,0,tot-1) A[i].x = a[i]*a[i]*a[i],A[i].y=0;
    rep(i,0,tot-1) B[i].x = b[i],B[i].y=0;
    FFT(A,1);
    FFT(B,1);
    rep(i,0,tot-1) P[i] = P[i]+A[i]*B[i];

    
    rep(i,0,tot-1) A[i].x = a[i],A[i].y=0;
    rep(i,0,tot-1) B[i].x = b[i]*b[i]*b[i],B[i].y=0;
    FFT(A,1);
    FFT(B,1);
    rep(i,0,tot-1) P[i] = P[i]+A[i]*B[i];


    FFT(P,-1);
    rep(i,m,n){ 
    	if(fabs(P[i].x) <= 1e-7){
    		cout << i-m <<endl;//匹配到的开始位置，i是结束位置
    	}
    }
 
}
signed main()
{
	int T;
	cin >> T;
	while(T--){
		int n,m;//字符串长度
		cin >> n >> m;
		n--,m--;//最高位指数
		string s1,s2;
		cin >> s1 >> s2;
		FFT_MATCH(s2,s1,m,n);
	}
    return 0;
}

FFT匹配板子（不带通配符）

理论上带通配符版本可以处理没有通配符的

但是由于进行了6次FFT，常数更大，普通版只需要2次。

但是不带通配符的问题就是字符串匹配的模版题，KMP有更好的线性复杂度

double  sum[maxn];
void FFT_MATCH(string s1, string s2, int m, int n){//s1模式串（短），s2文本串（长）
    reverse(all(s1));
    //应避免某个字符的值为0，选取字符集中不存在的字符减去即可
    rep(i,0,m) A[i].x = s1[i]-'0';
    rep(i,0,n) B[i].x = s2[i]-'0';


    //第一部分
    double T = 0;
    rep(i,0,m) T+=A[i].x*A[i].x;//计算定值T
    // cout << T <<endl;
    
    //第二部分
    sum[0] = B[0].x*B[0].x;
    rep(i,1,n) 
    	sum[i] = sum[i-1] + B[i].x*B[i].x;


    //第三部分
    while((1<<bit) < n+m+1) bit++;
	tot = 1<< bit;
    for (int i = 0; i < tot; i ++ )
        rev[i] = (rev[i >> 1] >> 1) | ((i & 1) << (bit - 1));
   
    FFT(A,1);
    FFT(B,1);
    rep(i,0,tot-1) C[i] = A[i]*B[i];
    FFT(C,-1);

    

    rep(i,m,n){ 
        double temp = 0;
        if(i==m) temp = T + sum[i] - 2*C[i].x;
        else temp = T + sum[i] - sum[i-m-1] -2*C[i].x;

    	if(fabs(temp) <= 1e-7){
    		cout << i-m <<endl;//匹配到的开始位置，i是结束位置
    	}
    }
 
}
signed main()
{
	int T;
	cin >> T;
	while(T--){
		int n,m;//字符串长度
		cin >> n >> m;
		n--,m--;//最高位指数
		string s1,s2;
		cin >> s1 >> s2;
		FFT_MATCH(s2,s1,m,n);
	}
    return 0;
}

FFT模糊匹配（k失配匹配）

一般这种问题能用FFT解决通常满足字符集较小

枚举字符集中每种字符c[k]
构建两个复数数组

A[i].x = (s1[i]==c[k])——B[i].x = (s2[i]==c[k])
对这两个数组进行无通配符的匹配
当且仅当对应位置相等并等于c[k]会有贡献1
对于字符集每个字符进行如上操作，将贡献累加到对应位置d[i]中
d[i]代表了以i结尾的位置匹配个数为d[i]个
d[i]>=匹配串长度-可失配个数则---->当前位置匹配成功

P3763 [TJOI2017]DNA FFT

题意:在S中匹配P的个数（允许3处不一样）

模板题

#include<iostream>
#include<cstring>
#include<cmath>
#include<vector>
#include<map>
#include<set>
#include<queue>
#include<stack>
#include<deque>
#include<algorithm>

using namespace std;


#define mem(a,b) memset(a,b,sizeof a)
#define PII pair<int,int>
#define ll long long
#define ull unsigned long long
#define fi first
#define se second
#define endl '\n'
// #define PI acos(-1.0)
#define lcm(a,b) a/gcd(a,b)*b
#define INF 0x3f3f3f3f
#define INF_L 0x3f3f3f3f3f3f3f3f
#define debug(a) cout<<#a<<"="<<a<<endl;
#define Adebug(a,i) cout<<#a<<"["<<i<<"]="<<a[i]<<endl;
#define int long long
#define readI(l,r,A) for(int pig=l;pig<=r;pig++)	iocin >> A[pig]
#define rep(i, a, b) for(int i = (a); i <= (b); i ++)
#define per(i, a, b) for(int i = (a); i >= (b); i --)
#define vi vector<int>
#define vpii vector<PII>
#define pb push_back
#define rvs(s) reverse(s.begin(),s.end())
#define all(s) s.begin(),s.end()
#define sz(s) (int)(s.size())
#define lb(s) ((s) & (-s))
#define mk(s, t) make_pair(s, t)
const double PI = acos(-1); 

inline void wt(ll x){printf("%lld",x);}
inline void wtl(ll x){printf("%lld\n",x);}
inline void wtb(ll x){printf("%lld ",x);}
template <typename T> bool chkmx(T &x, T y){return x < y ? x = y, true : false;}
template <typename T> bool chkmn(T &x, T y){return x > y ? x = y, true : false;}
int qmi(int a, int k, int p){int res = 1;while (k){if (k & 1) res = (ll)res * a % p;a = (ll)a * a % p;k >>= 1;}return res;}
int qpow(int a,int b){int res = 1;while(b){if(b&1) res *= a;b>>=1;a*=a;}return res;}
int mo(int x,int p){return x = ((x%p)+p)%p;}
int gcd(int a,int b){return b?gcd(b,a%b):a;
}
const int maxn = 2e5+7;
const int mod = 1e9+7;
int dx[] = {0,0,1,-1}, dy[] = {1,-1,0,0};

int rev[maxn], tot, bit;
double a[maxn], b[maxn];
//复数类
struct Complex
{
    double x, y;
    Complex operator+ (const Complex& t) const
    {
        return {x + t.x, y + t.y};
    }
    Complex operator- (const Complex& t) const
    {
        return {x - t.x, y - t.y};
    }
    Complex operator* (const Complex& t) const
    {
        return {x * t.x - y * t.y, x * t.y + y * t.x};
    }

}A[maxn], B[maxn], C[maxn];//原来的多项式



void FFT(Complex aa[], int inv)
{
    for (int i = 0; i < tot; i ++ )
        if (i < rev[i])
            swap(aa[i], aa[rev[i]]);
    for (int mid = 1; mid < tot; mid <<= 1)
    {
        auto w1 = Complex({cos(PI / mid), inv * sin(PI / mid)});
        for (int i = 0; i < tot; i += mid * 2)
        {
            auto wk = Complex({1, 0});
            for (int j = 0; j < mid; j ++, wk = wk * w1)
            {
                auto x = aa[i + j], y = wk * aa[i + j + mid];
                aa[i + j] = x + y, aa[i + j + mid] = x - y;
            }
        }
    }
    if(inv==-1){
        rep(i,0,tot-1) aa[i].x/=tot;
    }
}

int d[maxn];

char cc[4]={'A','T','C','G'};
signed main()
{
	int T;
	cin >> T;
	while(T--){
		mem(d,0);
		int n,m;//字符串长度
		string s1,s2;
		cin >> s1 >> s2;
		m = s2.size()-1;
		n = s1.size()-1;
		reverse(all(s2));
		while((1<<bit)<n+m+1) bit++;
		tot = 1<<bit;
		for (int i = 0; i < tot; i ++ )
        rev[i] = (rev[i >> 1] >> 1) | ((i & 1) << (bit - 1));
		
		rep(i,0,3){
			mem(A,0);
			mem(B,0);
			rep(j,0,n) A[j].x = (s1[j]==cc[i])? 1: 0;
			rep(j,0,m) B[j].x = (s2[j]==cc[i])? 1: 0;
			FFT(A,1);
			FFT(B,1);
			rep(j,0,tot-1) A[j] = A[j]*B[j];
			FFT(A,-1);
			rep(j,m,n) d[j] += (int)(A[j].x+0.5);
		}
		ll ans = 0;
		rep(i,m,n){
			if(d[i]>=m-2) ans++;
		}
		wtl(ans);
	}
    return 0;
}

2021杭电多校第三场1003

**思路：**和基因序列类似，不过加入了通配符。对于通配符的贡献我们要拿出来单独计算。由于文本串和模式串都包含通配符，需要用到一些小小的容斥原理。

数字部分贡献和上题相同。
通配符贡献 = A中通配符贡献 + B中通配符贡献 - AB中同时是通配符贡献

/*******************************
| Author:  pigstar
| Problem: Forgiving Matching
| Contest: HDOJ
| URL:     https://acm.dingbacode.com/showproblem.php?pid=6975
| When:    2021-10-16 00:46:51
| 
| Memory:  524 MB
| Time:    12000 ms
*******************************/
#include<iostream>
#include<cstring>
#include<cmath>
#include<vector>
#include<map>
#include<set>
#include<queue>
#include<stack>
#include<deque>
#include<algorithm>

using namespace std;


#define mem(a,b) memset(a,b,sizeof a)
#define PII pair<int,int>
#define ll long long
#define ull unsigned long long
#define fi first
#define se second
#define endl '\n'
#define PI acos(-1.0)
#define lcm(a,b) a/gcd(a,b)*b
#define INF 0x3f3f3f3f
#define INF_L 0x3f3f3f3f3f3f3f3f
#define debug(a) cout<<#a<<"="<<a<<endl;
#define Adebug(a,i) cout<<#a<<"["<<i<<"]="<<a[i]<<endl;
#define int long long
#define readI(l,r,A) for(int pig=l;pig<=r;pig++)	iocin >> A[pig]
#define rep(i, a, b) for(int i = (a); i <= (b); i ++)
#define per(i, a, b) for(int i = (a); i >= (b); i --)
#define vi vector<int>
#define vpii vector<PII>
#define pb push_back
#define rvs(s) reverse(s.begin(),s.end())
#define all(s) s.begin(),s.end()
#define sz(s) (int)(s.size())
#define lb(s) ((s) & (-s))
#define mk(s, t) make_pair(s, t)


inline void wt(ll x){printf("%lld",x);}
inline void wtl(ll x){printf("%lld\n",x);}
inline void wtb(ll x){printf("%lld ",x);}
template <typename T> bool chkmx(T &x, T y){return x < y ? x = y, true : false;}
template <typename T> bool chkmn(T &x, T y){return x > y ? x = y, true : false;}
int qmi(int a, int k, int p){int res = 1;while (k){if (k & 1) res = (ll)res * a % p;a = (ll)a * a % p;k >>= 1;}return res;}
int qpow(int a,int b){int res = 1;while(b){if(b&1) res *= a;b>>=1;a*=a;}return res;}
int mo(int x,int p){return x = ((x%p)+p)%p;}
int gcd(int a,int b){return b?gcd(b,a%b):a;
}
const int maxn = 2e6+7;
const int mod = 1e9+7;
int dx[] = {0,0,1,-1}, dy[] = {1,-1,0,0};

int rev[maxn], tot, bit;
double a[maxn], b[maxn];
//复数类
struct Complex
{
    double x, y;
    Complex(double _r=0,double _i=0){x=_r,y=_i;}
    Complex operator+ (const Complex& t) const
    {
        return {x + t.x, y + t.y};
    }
    Complex operator- (const Complex& t) const
    {
        return {x - t.x, y - t.y};
    }
    Complex operator* (const Complex& t) const
    {
        return {x * t.x - y * t.y, x * t.y + y * t.x};
    }

}A[maxn], B[maxn];



void FFT(Complex aa[], int inv)
{
    for (int i = 0; i < tot; i ++ )
        if (i < rev[i])
            swap(aa[i], aa[rev[i]]);
    for (int mid = 1; mid < tot; mid <<= 1)
    {
        auto w1 = Complex({cos(PI / mid), inv * sin(PI / mid)});
        for (int i = 0; i < tot; i += mid * 2)
        {
            auto wk = Complex({1, 0});
            for (int j = 0; j < mid; j ++, wk = wk * w1)
            {
                auto x = aa[i + j], y = wk * aa[i + j + mid];
                aa[i + j] = x + y, aa[i + j + mid] = x - y;
            }
        }
    }
    if(inv==-1){
        rep(i,0,tot-1) aa[i].x/=tot;
    }
}

int d[maxn];//贡献--匹配了的个数
int ans[maxn];//失配i匹配成功的个数

signed main()
{
	int T;
	cin >> T;
	while(T--){
		
		int n,m;//字符串长度
		cin >> n >> m;
		string s1,s2;
		cin>>s1>>s2;
		mem(d,0);mem(ans,0);
		reverse(all(s2));
		while((1<<bit)<n+m-1) bit++;
		tot = 1<<bit;
		for (int i = 0; i < tot; i ++ )
        rev[i] = (rev[i >> 1] >> 1) | ((i & 1) << (bit - 1));
		
		//只计算数字的匹配个数
		for(char i = '0'; i <= '9'; i++){
			rep(j,0,tot){
				A[j].x = A[j].y = B[j].x = B[j].y = 0;
			}
			rep(j,0,n-1) A[j].x = (s1[j]==i);
			rep(j,0,m-1) B[j].x = (s2[j]==i);
			FFT(A,1);
			FFT(B,1);
			rep(j,0,tot-1) A[j] = A[j]*B[j];
			FFT(A,-1);
			rep(j,m-1,n-1){ 
				d[j] += (int)(A[j].x+0.5);
			}
		}
		

		//S1中星号去匹配S2
		rep(j,0,tot){
			A[j].x = A[j].y = B[j].x = B[j].y = 0;
		}
		rep(j,0,m-1) B[j].x = 1;
		rep(j,0,n-1) A[j].x = (s1[j]=='*');
		FFT(A,1);
		FFT(B,1);
		rep(j,0,tot-1) A[j] = A[j]*B[j];
		FFT(A,-1);
		rep(j,m-1,n-1) d[j] += (int)(A[j].x+0.5);


		//S2星号去匹配S1
		rep(j,0,tot){
			A[j].x = A[j].y = B[j].x = B[j].y = 0;
		}
		rep(j,0,n-1) A[j].x = 1;
		rep(j,0,m-1) B[j].x = (s2[j]=='*');
		FFT(A,1);
		FFT(B,1);
		rep(j,0,tot-1) A[j] = A[j]*B[j];
		FFT(A,-1);
		rep(j,m-1,n-1) d[j] += (int)(A[j].x+0.5);
		

		//减去S1，S2同时是星号的匹配情况
		rep(j,0,tot){
			A[j].x = A[j].y = B[j].x = B[j].y = 0;
		}
		rep(j,0,n-1) A[j].x = (s1[j]=='*');
		rep(j,0,m-1) B[j].x = (s2[j]=='*');
		FFT(A,1);
		FFT(B,1);
		rep(j,0,tot-1) A[j] = A[j]*B[j];
		FFT(A,-1);
		rep(j,m-1,n-1) d[j] -= (int)(A[j].x+0.5);
		

		//有成功匹配个数转化为失配个数
		rep(i,m-1,n-1){
			ans[m-d[i]]++;
		}
		//输出前缀和
		ll now = 0;
		rep(i,0,m){
			now+=ans[i];
			wtl(now);
		}
		
	}
    return 0;
}

Hash

哈希k失配匹配问题

P3763 [TJOI2017]DNA Hash + 二分

**题意：**在S中匹配P的个数（允许3处不一样）

**思路：**枚举P (len=n) 在S (len = m) 的起点（ $O (n)$ ）每一次都二分从当前起点开始下一处不匹配的地方在哪（ $\times log_{2}m$ ）（hs1[l…mid] != hs2[l…mid] 说明不匹配的点在前面—>r = mid-1否则l = mid+1）

总复杂度： $O(nklog_{2}m)$

#include<iostream>
#include<cstring>
#include<cmath>
#include<vector>
#include<map>
#include<set>
#include<queue>
#include<stack>
#include<deque>
#include<algorithm>

using namespace std;


#define mem(a,b) memset(a,b,sizeof a)
#define PII pair<int,int>
#define ll long long
#define ull unsigned long long
#define fi first
#define se second
#define endl '\n'
#define PI acos(-1.0)
#define lcm(a,b) a/gcd(a,b)*b
#define INF 0x3f3f3f3f
#define INF_L 0x3f3f3f3f3f3f3f3f
#define debug(a) cout<<#a<<"="<<a<<endl;
#define Adebug(a,i) cout<<#a<<"["<<i<<"]="<<a[i]<<endl;
#define int long long
#define readI(l,r,A) for(int pig=l;pig<=r;pig++)	iocin >> A[pig]
#define rep(i, a, b) for(int i = (a); i <= (b); i ++)
#define per(i, a, b) for(int i = (a); i >= (b); i --)
#define vi vector<int>
#define vpii vector<PII>
#define pb push_back
#define rvs(s) reverse(s.begin(),s.end())
#define all(s) s.begin(),s.end()
#define sz(s) (int)(s.size())
#define lb(s) ((s) & (-s))
#define mk(s, t) make_pair(s, t)


inline void wt(ll x){printf("%lld",x);}
inline void wtl(ll x){printf("%lld\n",x);}
inline void wtb(ll x){printf("%lld ",x);}
template <typename T> bool chkmx(T &x, T y){return x < y ? x = y, true : false;}
template <typename T> bool chkmn(T &x, T y){return x > y ? x = y, true : false;}
int qmi(int a, int k, int p){int res = 1;while (k){if (k & 1) res = (ll)res * a % p;a = (ll)a * a % p;k >>= 1;}return res;}
int qpow(int a,int b){int res = 1;while(b){if(b&1) res *= a;b>>=1;a*=a;}return res;}
int mo(int x,int p){return x = ((x%p)+p)%p;}
int gcd(int a,int b){return b?gcd(b,a%b):a;
}
const int maxn = 1e5+7;
const int mod = 1e9+7;
int dx[] = {0,0,1,-1}, dy[] = {1,-1,0,0};

int T,N,M;

ull hs[maxn][2],P[maxn];

ull cal(int l, int r, int id){//求字串哈希值
    return hs[r][id] - hs[l-1][id] * P[r-l+1];
}


int lcp(int l1, int l2){//l1 S开始位置；l2 P开始位置
    int l = 1, r = M - l2+1;//二分的是往后相同的长度，最多到M，所以r是M-l2+1
    while(l<=r){
        int mid = (l+r)>>1;
        if(cal(l1,l1+mid-1,0) == cal(l2,l2+mid-1,1)) l = mid+1;//mid可行
        else r = mid-1;
    }
    return l-1;
}
signed main()
{

	cin >> T;
	while(T--){
        string S[2];
        cin >> S[0] >> S[1];
        S[0] = ' ' + S[0];
        S[1] = ' ' + S[1];
        N = S[0].size()-1, M = S[1].size()-1;
        P[0] = 1;
        rep(i,1,N) P[i] = P[i-1]*2333;
        //计算字符串的哈希值前缀
        rep(i,1,N) 
            hs[i][0] = hs[i-1][0]*2333 + S[0][i];
        rep(i,1,M) 
            hs[i][1] = hs[i-1][1]*2333 + S[1][i];
        

        ll ans = 0;
        rep(i,1,N){
            if(i+M-1>N) break;//从i开始往后长度不满足M
            int p1 = 1, p2 = 1;//开始位置都是1
            rep(j,1,4){//允许3次不同，那么找到第四个不同的点
                int x = lcp(i+p1-1,p2);//相同的长度
                //更新位置
                p1+=x;
                p2+=x;
                if(j!=4) p1++,p2++;//跳过错误的位置，最后一个不能跳
                if(p2>M){//满足了
                    ans++;
                    break;
                }
            }
        }
        wtl(ans);

	}
    return 0;
}

XINNNNNNNYU

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
0
评论
字符串匹配进阶

A位模式串，长度为n；B为文本串，长度为m。1.没有通配符的调整匹配函数——和上述没有通配符的相同，但是我们要去掉通配符位置的贡献。将原字符串是通配符的值设为0，再修改完全匹配匹配函数为P(x)=∑i=0m−1(S[m−i−1]−B[x−m+i+1])2S[m−i−1]B[x−m+i+1]P(x) =\displaystyle \sum^{m-1}_{i=0} (S[m-i-1] - B[x-m+i+1])^2S[m-i-1]B[x-m+i+1]P(x)=i=0∑m−1(S[m−i−1]−B[x−m+i+
复制链接

扫一扫