KMP字符串匹配+Tire树+并查集+堆+哈希表+字符串哈希

lihua777

已于 2022-02-11 23:25:27 修改

阅读量823

点赞数 1

分类专栏： AcWing算法基础文章标签：散列表数据结构 c++ 算法

于 2022-02-11 20:40:58 首次发布

本文链接：https://blog.csdn.net/lihua777/article/details/122880194

版权

AcWing算法基础专栏收录该内容

9 篇文章 2 订阅

订阅专栏

KMP实例：

学算法之前我们需要先了解暴力方法：

本题的含义即是寻找needel字符串第一次出现在haystack的下标

暴力匹配：

两个for循环，外层循环遍历haystack，内层循环遍历寻找needle相同的字符串，若有一个字母不相同，那么则跳出本次循环，执行第二次外循环，因此我们需要一个标志flag，来给予我们一个反馈，即如果遍历到有字母不相同，那么令flag=0，跳出循环，如果遍历完neeelde长的字符串，没有发现有字母不相同，那么该串即是正确答案，令flag=1，跳出内循环，在内循环加一个flag的判断条件，如果flag==1，那么就返回外循环此时的i下标

class Solution {
public:
    int strStr(string haystack, string needle) {
        int n = haystack.size(), m = needle.size();
        for (int i = 0; i + m <= n; i++) {
            bool flag = true;
            for (int j = 0; j < m; j++) {
                if (haystack[i + j] != needle[j]) {
                    flag = false;
                    break;
                }
            }
            if (flag) {
                return i;
            }
        }
        return -1;
    }
};

分析：时间复杂度很明显的是：O((n-m)*m) -> O(n*m)

KMP算法：

理论内容方面：

代码随想录代码随想录PDF，代码随想录百度网盘，代码随想录知识星球，代码随想录八股文PDF，代码随想录刷题路线，代码随想录知识星球八股文https://www.programmercarl.com/0028.%E5%AE%9E%E7%8E%B0strStr.html概要：

getNext()函数

（1）构造前缀表（next数组）

（2）遇到不相同的字母，利用前缀表进行回退操作
注意：是一边构造next数组，一边进行的回退操作，具体看代码

构造依据的理论依据是：字符串最长相等前后缀的长度作为next数组的下标

执行完getNext函数后我们就得到了 Next数组

接着我们就利用Next数组找到对应的字母，进而压缩了匹配时间，而不是像暴力方法一样：对haystack的下一个字母重新进行匹配

class Solution {
public:
    void getNext(int* next, const string& s) {
        int j = 0;
        next[0] = 0;//初始化
        for(int i = 1; i < s.size(); i++) {
            while (j > 0 && s[i] != s[j]) {//不相等则进行回退
                j = next[j - 1];//按着已部分构造好的next数组进行回退操作
            }
            if (s[i] == s[j]) {//如果相等进行前进
                j++;
            }
            next[i] = j;//构造next数组
        }
    }
    int strStr(string haystack, string needle) {
        if (needle.size() == 0) {
            return 0;
        }
        int next[needle.size()];//构造一个与needle数组一样大小的next数组
        getNext(next, needle);//构造next数组
        int j = 0;
        for (int i = 0; i < haystack.size(); i++) {
            while(j > 0 && haystack[i] != needle[j]) {
                j = next[j - 1];//利用已完全构造好的next数组进行回退操作
            }
            if (haystack[i] == needle[j]) {
                j++;
            }
            if (j == needle.size() ) {//表明haystack字符串中确实存在needle字符串
                return (i - needle.size() + 1);//返回它在haystack字符串中的开头位置
            }
        }
        return -1;
    }
};

Tire树

含义:用来高效的存储和查找字符串集合的数据结构

红点表示存在一个字符串，它的字符串为：红点之前包括红点的所有字母的组合（逆序）

问题：

维护一个字符串集合，支持两种操作：

I x 向集合中插入一个字符串 x xx；
Q x 询问一个字符串在集合中出现了多少次

因此我们需要需要三个数组：

char str数组：存储每个字母

int son[N][26]数组：存储每个点的子节点

int cnt[N]：cnt存储当前点为结尾的单词个数

代码实现：

#include<iostream>
using namespace std;
const int N = 100010;//表示树的深度
int son[N][26], cnt[N], idx;//son存储每个点的子结点，cnt存储当前点为结尾的单词个数,idx指示当前用到的下标
char str[N];//存储字符串
//下标是0的点，既是根结点，又是空结点
void insert(char str[]) 
{
    int p = 0;//从根节点开始
    for (int i = 0; str[i]; i++)
    {
        int u = str[i] - 'a';//依次获取字母
        if (!son[p][u]) son[p][u] = ++idx;//如果p点不存在u这个儿子，创建它
        p = son[p][u];//p表示深度，表明它在此时在第几层
    }
    //son：第几层的a~z中的哪个字母
    //最后p点为该单词的结尾，对其加上标记，次数+1
    cnt[p]++;//将cnt对应层的字母++
}
int query(char str[]) 
{
    int p = 0;//从根结点开始
    for (int i = 0; str[i]; i++)
    {
        int u = str[i] - 'a';//获取字母->将字母转化为数字'a'->0
        if (!son[p][u]) return 0;//如果p层的这个字母为空，那么就证明没有这个字母，也就没有这个字符串
        p = son[p][u];//向下寻找子节点
    }
    return cnt[p];
}
int main() {
    int n;
    scanf("%d", &n);
    while (n--) 
    {
        char op[2];
        scanf("%s%s", op, str);
        if (op[0] == 'I') insert(str);
        else printf("%d\n", query(str));
    }
    return 0;
}

核心：

其实理解son数组是核心：想想它为什么是int型？它的作用是帮我们找到子节点的位置，那么它存储的就必然是位置，只不过这个位置是用数字表现出来的，而非数据结构中的地址

p表示现在所在的层数，一开始为0，向下逐层+1，[26]表示的是，通过str[i]-'a'的操作得到的数字，是字母的转化，同时还要一个idx指针来跟随着我们的移动，相当于用idx来为son[][]赋值，进而起到存储地址的作用

例题：最大异或对

两位大佬的内容结合：我终于看懂了= =

AcWing 143. 最大异或对的证明与简单分析 - AcWingAcWing,题解,最大异或对的证明与简单分析,https://www.acwing.com/solution/content/72001/AcWing 143. 最大异或对（好题） - AcWingAcWing,题解,最大异或对（好题）,https://www.acwing.com/solution/content/9587/

#include <iostream>

using namespace std;

const int N = 1e5 + 10, M = 31 * N;

int ne[M][2], a[N], idx = 1;

void insert(int x)
{
    int u = 0;
    for(int i = 30; i >= 0; i--)
    {
        int v = x >> i & 1;
        if(!ne[u][v]) ne[u][v] = idx++;
        u = ne[u][v];
    }
}

int query(int x)
{
    int u = 0, res = 0;
    for(int i = 30; i >= 0; i--)
    {
        int v = x >> i & 1;
        if(ne[u][!v])
        {
            u = ne[u][!v];
            res = res * 2 + !v;
        }
        else
        {
            u = ne[u][v];
            res = res * 2 + v;
        }
    }
    return res;
}

int main()
{
    int n, res = 0;
    cin >> n;
    for(int i = 0; i < n; i++)
    {
        cin >> a[i];
        insert(a[i]);
        res = max(res, query(a[i]) ^ a[i]);
    }

    cout << res << endl;

    return 0;
}

并查集

快速地处理以下问题：

1、将两个集合合并

2、询问两个元素是否在一个集合当中

基本原理：每个集合用一棵树来表示，树根的编号就是整个集合的编号，每个结点存储它的父节点，p[x]表示x的父节点

核心1：判断是否为树根：if（p[x]==x）

核心2：求x的集合编号：while(p[x]!=x) x=p[x];

核心3：合并两个区间：px是x的集合编号，py是y的集合编号，p[x]=y

例题：合并区间

836. 合并集合 - AcWing题库

前提：在未开始进行合并操作时，在p数组中，每个数的值都等于它的下标，因为：p[i]=i

核心:find函数（找到它的祖宗节点，因为只有祖宗节点时p[i]==x）所以一开始在没有合并之前每个数都是祖宗节点，合并操作即是把我焊接到你的祖宗节点之下，以M 1 2为例，合并1，2区间，第一步即找到它们的祖宗节点，但由于是第一步所以它们的祖宗就是它们本身，所以find函数返回的就是它们本身，然后p[1]=p[2]，将p[2]的值赋给p[1]，也就是p[1]=p[2]=2，那么相当于p1被连接到了p2的祖宗节点之下，接下来判断两个区间是否合并，只需判断它们的祖先是不是同一个即可，例如p[1]和p[2]都是2，所以它们的祖先一样，它们区间已经合并了

那么将1区间和3区间合并的效果呢？

同样按照这个方法走：find(1)->(p[1]!=1) p[1]=find(p[1]) p[2]=2 return p[2]

所以返回的是p[1]的祖宗p[2]

find(3)就是它本身了将p[2]=p[3] 即p[2]=p[3]=3(此时3成为了新的祖宗节点)

画图：很容易就能看出只有p[x]==x才是祖宗节点

#include<iostream>
using namespace std;
const int N = 10010;
int n, m;
int p[N];
int find(int x)
{
	if (p[x] != x) p[x] = find(p[x]);
	return p[x];
}
int main()
{
	cin >> n >> m;
	for (int i = 1; i <= n; i++) p[i] = i;
	while (m--)
	{
		char op[2];
		int a, b;
		cin >> op >> a >> b;
		if (op[0] == 'M') p[find(a)] = find(b);
		else
		{
			if (find(a) == find(b))
			{
				puts("Yes");
			}
			else
			{
				puts("No");
			}
		}
	}

	return 0;
}

例题：连通块的中点数量

给定一个包含 n 个点（编号为 1~n）的无向图，初始时图中没有边。

现在要进行 m 个操作，操作共有三种：

C a b，在点 a 和点 b bb 之间连一条边，a 和 b 可能相等；
Q1 a b，询问点 a 和点 b 是否在同一个连通块中，a 和 b 可能相等；
Q2 a，询问点 a 所在连通块中点的数量；

#include<iostream>
using namespace std;

const int N = 10010;
int fa[N], Size[N];

void init()
{
	for (int i = 0; i < N; i++)
	{
		fa[i] = i;
		Size[i] = 1;
	}
}
int find(int x)//找到祖宗节点
{
	return fa[x] == x ? x : fa[x] = find(fa[x]);
}
void un(int a, int b)
{
	int aa = find(a);//找到祖宗节点
	int bb = find(b);
	if (aa != bb)//如果这两个不是同一个数
	{
		fa[aa] = bb;//相当与a连接到b上
		Size[bb] += Size[aa];//b的数量=本身+a所带的数量
	}
}

int main()
{
	init();
	int n, m;
	cin >> n >> m;
	while (m--)
	{
		char op[5];
		int a, b;
		cin >> op;
		if (op[0] == 'C')//连接
		{
			cin >> a >> b;
			un(a, b);
		}
		else if (op[1] == '1')//是否连接
		{
			cin >> a >> b;
			if (find(a) == find(b))//如果祖宗相等，那么已连接
			{
				cout << "Yes" << endl;
			}
			else
			{
				cout << "No" << endl;
			}
		}
		else if (op[1] == '2')//连通块的数量 祖先节点所存有的数量
		{
			cin >> a;
			cout << Size[find(a)] << endl;
		}
	}

	return 0;
}

例题：食物链

240. 食物链 - AcWing题库

//当动物x和动物y的距离%3等于1时,说明x捕食y
//当动物x和动物y的距离%3等于2时,说明y捕食x 也可以说y是x的天敌
//当动物x和动物y的距离%3等于0时,说明x和y是同类
#include<iostream>
using namespace std;
const int N=5e4+10;
int animal[N],len[N];//length[x]是x到根节点的距离
int quantity;//假话的数量
int find(int x)//路径压缩
{
    if(animal[x]!=x)
    {
        int u=find(animal[x]);
        len[x]+=len[animal[x]];
        animal[x]=u;
    }
    return animal[x];
}
int main()
{
    int n,m;
    cin>>n>>m;
    for(int i=1;i<=n;i++) animal[i]=i;
    while(m--)
    {
        int op,x,y;
        cin>>op>>x>>y;
        if( x>n || y>n ) quantity++;
        else
        {
            int px = find(x), py = find(y);
            if(op==1)//真话 x和y是同类
            {
                if(find(x)==find(y) && (len[x]-len[y])%3)
                    quantity++;
                else if(px!=py)
                {
                    //合并x和y所在集合
                    animal[px]=py;
                    /*因为合并x和y所在集合多出了一段长度
                    这块长度是find(x)到find(y)的距离
                    所以求多出来的这块部分的长度
                    当x和y是同类时,有这样的特性
                    (len[x]+len[find[x]]-len[y])%3==0
                    这里的len[x]是还未合并时,x到find[x]的距离
                    ∴len[find[x]]=len[y]-len[x]
                    */
                    len[px]=len[y]-len[x];
                }
            }
            else//真话 x捕食y
            {
                /*
                  当x和y在一个集合中时,由题目可知,x捕食y
                  此时有 
                  x到根节点的距离-y到根节点的距离=1+3k k为任意
                  实数
                  ∴当(len[x]-len[y]-1-3k)%3 ==0 时可确认
                  x捕食y
                  反之当(len[x]-len[y]-1-3k)%3 !=0 
                  x不可能捕食y
                */
                if(px==py && (len[x]-len[y]- 1) %3)
                    quantity++;
                else if(px!=py)
                {
                    //当x和y不在一个集合时,将x和y所在集合合并
                    animal[px]=py;
                    /*
                    设find(x)到find(y)的距离为len([find(x)])
                    此时有len[x]+len([find(x)])-len[y]=3k+1
                    ∴len[find(x)]=-len[x]+len[y]+1+3k
                    */
                    len[px]=len[y]+1-len[x];
                }
            }
        }
    }
    cout<<quantity;
    return 0;
}

堆

堆排序：

#include<iostream>
#include<algorithm>
using namespace std;

const int N = 10010;
int n, m;
int h[N], Size;

void down(int u)
{
	int t = u;
	if (2 * u <= Size && h[2 * u] < h[t]) t = 2 * u;
	if (2 * u + 1 <= Size && h[u * 2 + 1] < h[t]) t = 2 * u + 1;
	if (u!=t)
	{
		swap(h[u], h[t]);
		down(t);
	}
}

int main()
{
	cin >> n >> m;
	for (int i = 1; i <= n; i++) cin >> h[i];
	Size = n;
	for (int i = n / 2; i; i--) down(i);//建堆
	while (m--)
	{
		cout << h[1] << " " << endl;
		h[1] = h[Size];
		Size--;
		down(1);
	}

	return 0;
}

模拟堆：

#include<iostream>
#include<algorithm>
#include<string.h>
using namespace std;
const int N=100010;
int n;
int h[N],ph[N],hp[N],Size;
/*ph[i]表示第i个插入的数的下标   hp[i]表示下标为i的数是第几个插入的*/
void heap_swap(int a,int b){
    swap(ph[hp[a]],ph[hp[b]]);
    swap(hp[a],hp[b]);
    swap(h[a],h[b]);
}
void down(int u){
    int t=u;
    if(u*2<=Size && h[u*2]<h[t]) t=u*2;
    if(u*2+1<=Size && h[u*2+1] < h[t]) t=u*2+1;
    if(u!=t){
        heap_swap(u,t);
        down(t);
    }
}
void up(int u){
    while(u/2 &&h[u/2]>h[u]){
      	heap_swap(u/2,u);
        u/=2;
    }
}
int main(){
   	scanf("%d",&n);
    int m=0;
    while(n--){
        char op[10];
        int k,x;
        scanf("%s",op);
        if(!strcmp(op,"I")){
            scanf("%d",&x);
            Size++;
            m++;
            ph[m]=Size;
            hp[Size]=m;
            h[Size]=x;
            up(Size);
        }else if(!strcmp(op,"PM")){
			printf("%d\n",h[1]);
        }else if(!strcmp(op,"DM")){
			heap_swap(1,Size);
            Size--;
            down(1);
        }else if(!strcmp(op,"D")){
            scanf("%d",&k);
            k=ph[k];
            heap_swap(k,Size);
            Size--;
            down(k);up(k);
        }else{
            scanf("%d%d",&k,&x);
            k=ph[k];
            h[k]=x;
            down(k),up(k);
        }
    }
    return 0;
}

哈希表

方法：直接将 x mod 1e5，但是这样会存在哈希冲突**（取模的数尽可能是质数）

拉链法：

核心：用单链表存储哈希冲突的位置

#include<iostream>
#include<cstring>
using namespace std;

const int N = 100003;
int h[N], e[N], ne[N], idx;

void insert(int x)
{
	int t = (x % N + N) % N;//防止出现负数
	e[idx] = x;//单链表的插入方法
	ne[idx] = h[t];
	h[t] = idx++;
}

bool find(int x)//寻找
{
	int t = (x % N + N) % N;
	for (int i = h[t]; i != -1; i = ne[i])
	{
		int u = e[i];
		if (u == x) return 1;
	}
	return 0;
}
int main()
{
	int n;
	cin >> n;
	memset(h, -1, sizeof(h));
	while (n--)
	{
		char op[2];
		int x;
		cin >> op >> x;
		if (op[0] == 'I')
		{
			insert(x);
		}
		else
		{
			if (find(x))
			{
				puts("Yes");
			}
			else
			{
				puts("No");
			}
		}
	}

	return 0;
}

开放寻址法：（蹲坑寻位法）

核心：用特殊的标记null表示这个位置有人

#include<iostream>
#include<cstring>
using namespace std;
const int N=100003,null=0x3f3f3f3f;
int h[3*N];
int find(int x){
	int t=(x%N+N)%N;
	while(h[t]!=null&&h[t]!=x){
		t++;
		if(t==N) t=0;
	}
	return t;
}
int main(){
	int n;
	scanf("%d",&n);
	memset(h,0x3f,sizeof h);
	while(n--){
		char op[2];
		int x;
		scanf("%s%d",op,&x);
		int k=find(x);
		if(op[0]=='I'){
			h[k]=x;
		}else{
			if(h[k]==null) puts("No");
			else puts("Yes");
		}
	}
}

字符串哈希：

判断[left1,right1]和[left2,right2]是否相等

应用了前缀和

#include<iostream>
using namespace std;
typedef unsigned long long ull;//ull溢出相当于对2^64取模 
const int N=100010, P=131;//P一般取131或13331
int n,m;
char str[N];
ull h[N],p[N];
ull get(int l,int r){
	//求l~r一段的哈希值公式 
	return h[r]-h[l-1]*p[r-l+1];
} 
int main(){
	scanf("%d%d%s",&n,&m,str+1);
	p[0]=1;
	for(int i=1;i<=n;i++){
		p[i]=p[i-1]*P;//求p的次幂 
		h[i]=h[i-1]*P+str[i];//求前缀哈希值 
	}
	while(m--){
		int l1,r1,l2,r2;
		scanf("%d%d%d%d",&l1,&r1,&l2,&r2);
		if(get(l1,r1)==get(l2,r2)) puts("Yes");
		else puts("No");
	}
	return 0;
}