Section 15.Hash表

最新推荐文章于 2024-11-01 17:00:11 发布

Zenita

最新推荐文章于 2024-11-01 17:00:11 发布

阅读量569

点赞数 1

分类专栏：算法基础文章标签：哈希算法算法 c++ 数据结构

本文链接：https://blog.csdn.net/qq_31383387/article/details/122888677

版权

算法基础专栏收录该内容

17 篇文章 0 订阅

订阅专栏

一、什么是哈希表？

通过一个散列函数，将任意元素映射在数组下标中。它提供了快速的插入操作和查找操作，无论哈希表中有多少条数据，插入和查找的时间复杂度都是为O(1)。

二、哈希表的存储结构（冲突的处理方式）

开放寻址法
拉链法：在冲突位置引出链表

三、数字哈希

AcWing840https://www.acwing.com/problem/content/842/

拉链法

#include <iostream>
#include <cstring>
const int N = 100003;  //大于十万（题目数据范围）的最小质数
int h[N];  //头数组（存储哈希表）
int e[N], ne[N];  //链表的值以及next数组
int idx;  //当前用到了哪个下标
using namespace std;
void insert(int x)  //插入x
{
	int k = (x % N + N) % N;  //k是哈希值，该行通过哈希函数将x存储在0~N的范围内，因为x可能为负值，所以x%N后+N再%N
	e[idx] = x; ne[idx] = h[k]; h[k] = idx++;  //单链表的插入，h[k]存储头节点的next值
}
bool find(int x)  //寻找x
{
	int k = (x % N + N) % N;
	for (int i = h[k]; i != -1; i = ne[i])
	{
		if (e[i] == x)
			return true;
	}
	return false;
}
int main()
{
	int n;
	cin >> n;
	memset(h, -1, sizeof(h));  //空指针用-1表示
	while (n--)
	{
		char op[2];
		int x;
		cin >> op >> x;
		if (op[0] == 'I')
			insert(x);
		else
		{
			if (find(x))
				cout << "Yes\n";
			else
				cout << "No\n";
		}
	}
	return 0;
}

开放寻址法

#include <iostream>
#include <cstring>
const int N = 200003;  //大于20万（题目数据的2-3倍）的最小质数
const int null = 0x3f3f3f3f;  //空的位置用一个超出题目数据范围（-1e9~1e9）的数表示
int h[N];
using namespace std;
int find(int x)  //寻找x,如果h[N]中存在x，则返回它的位置，如果不存在x，则返回它应该被存储的位置
{
	int k = (x % N + N) % N;
	while (h[k] != null && h[k] != x)  //如果这个位置非空且这个位置的数不等于x
	{
		k++;
		if (k == N)  //如果循环到最后
			k = 0;  //从头开始查找
	}
	return k;
}
int main()
{
	int n;
	cin >> n;
	memset(h, 0x3f, sizeof(h));  //memset对每个字节赋值，一个int占四个字节，所以每个字节都是0x3f时，h中每个地址存储的值都是0x3f3f3f3f
	while (n--)
	{
		char op[2];
		int x;
		cin >> op >> x;
		if (op[0] == 'I')
		{
			int k = find(x);
			h[k] = x;
		}
		else
		{
			int k = find(x);
			if (h[k] != null)
				cout << "Yes\n";
			else
				cout << "No\n";
		}
	}
	return 0;
}

四、字符串哈希（字符串前缀哈希法）

若str = ABCABCDECZWBLOG

则，令

h[0] = 0;

h[1] = "A"的hash值

h[2] = "AB"的hash值

h[3] = "ABC"的hash值

h[4] = "ABCA"的hash值

h[5] = "ABCAB"的hash值

……

如何定义某一个字符串的hash值？

把字符串看成一个p进制的数。

例如：我们有字符串“ABCD”，我们假设A对应1，B对应2，C对应3，D对应4，那么我们可以把“ABCD”看成一个有四位数字的p进制数，即 $\left ( 1234 \right )_p$ 。

那么，如果我们把这个数换算成十进制，就是 $1\times p^{3}+2\times p^{2}+3\times p^{1}+4\times p^{0}$ ，因为字符串往往很长，所以如果换算成十进制这个数会非常大，所以我们可以将结果 $mod~Q$ ，这样就可以将这个数（即字符串）映射在下标为 $0\sim Q-1$ 的数组中

不能将某个字符映射成0

[神奇的经验值]当p=131或者13331且Q= $2^{64}$ 时，在99.99%的情况下不会产生冲突

[怎么证?我也不知道]如果已知h[L-1]和h[R]，那么str[L]~str[R]的哈希值为 $h[R]-h[L-1]\times p^{R-L+1}$

要存储h[]，我们可以使用 $unsigned~long~long$ ，因为 $unsigned~long~long$ 的存储范围为 $0\sim 2^{64}-1$ ，如果溢出，就相当于 $mod~2^{64}$ ，省去了取模的过程

预处理前缀的哈希值：h[i] = h[i-1] $\times$ p + str[i]

AcWing841https://www.acwing.com/problem/content/843/

#include <iostream>
using namespace std;
const int N = 100010, P = 131;
typedef unsigned long long ULL;
int n, m;
char str[N];
ULL h[N], p[N];  //p数组用来存储公式中超大的p的r-l+1次方,无须担心数据溢出，反正要取模
ULL get(int l, int r)  //求出str[l]~str[r]的哈希值
{
	return h[r] - h[l - 1] * p[r - l + 1];
}
int main()
{
	scanf("%d%d%s", &n, &m, str + 1);  //str下标从1开始
	p[0] = 1;
	for (int i = 1; i <= n; i++)
	{
		p[i] = p[i - 1] * P;
		h[i] = h[i - 1] * P + str[i];  //str[i]只要非零即可。这里直接用ASCII码的转换作为哈希函数
	}
	while (m--)
	{
		int l1, r1, l2, r2;
		cin >> l1 >> r1 >> l2 >> r2;
		if (get(l1, r1) == get(l2, r2))
			cout << "Yes\n";
		else
			cout << "No\n";
	}
	return 0;
}