哈希表（开散列和闭散列）简易实现

最新推荐文章于 2023-05-17 10:12:14 发布

小丑小丑小丑

最新推荐文章于 2023-05-17 10:12:14 发布

阅读量259

点赞数

分类专栏：算法文章标签：散列表哈希算法数据结构

本文链接：https://blog.csdn.net/m0_51641706/article/details/124523458

版权

算法专栏收录该内容

26 篇文章 1 订阅

订阅专栏

前言：这里是用oop实现的哈希表，实现的很简易，没有实现迭代器等。用于复习这两个散列方式的思想

文章目录

开放寻址发（闭散列）
- 负载因子
- 仿函数
拉链法（开散列）
- 结构
- 负载因子

开放寻址发（闭散列）

先贴代码：

#include <iostream>
#include <vector>
using namespace std;

enum state
{
	EMPTY,
	EXIST,
	DELETE
};

template<class k, class v>
struct hashdata
{
	k key;
	v val;
	state s;
	hashdata(const k& _key = 0, const v& _val = 0)
	{
		key = _key, val = _val;
		s = EMPTY;
	}
};

template<class k>
struct hashfunc
{
	size_t operator()(const k& key)
	{
		return key;
	}
};

template<>
struct hashfunc<string>
{
	size_t operator()(const string& s)
	{
		size_t val = 0;
		for (auto e : s)
		{
			val *= 131;
			val += e;
		}
		return val;
	}
};

template<class k, class v, class hash = hashfunc<k>>
class hashmap
{
	typedef hashdata<k, v> hashdata;
public:
	hashmap()
	{
		table.resize(10);
	}

	bool insert(const k& key, const v& val) 
	{
		hashdata* res = find(key);
		if (res) return false;

		if ((double)sz / table.size() > 0.7)
		{
			hashmap<k, v> tmp;
			tmp.table.resize(table.size() * 2);
			for (auto &e : table)
			{
				if(e.s == EXIST)
					tmp.insert(e.key, e.val);
			}
			table.swap(tmp.table);
		}
		hash h;
		int index = h(key) % table.size();
		while (table[index].s == EXIST)
		{
			index = (index + 1) % table.size();
		}
		table[index].key = key, table[index].val = val, table[index].s = EXIST;
		sz++;
		return true;
	}

	hashdata* find(const k& key)
	{
		int index = key % table.size();
		while (table[index].s == EXIST)
		{
			if (table[index].key == key)
				return &table[index];
			index = (index + 1) % table.size();
		}
		return nullptr;
	}

	bool erase(const k& key)
	{
		hashdata* data = find(key);
		if (data == nullptr) return false;
		sz--;

		data->s = DELETE;
	}
private:
	vector<hashdata> table;
	size_t sz;
};

关于开放寻址法的实现有很多，包括y总的那种极简实现方法。这里写成这样，主要是想学习到仿函数和负载因子两个知识点。

负载因子

负载因子的定义是:插入的数据量 / 总体的数据量。
负载因子会影响冲突的可能性。
负载因子越大，冲突的可能性越大。负载因子越小，冲突可能性越小。

说人话就是：插入的数据越多，越可能冲突。插入的数据越少，越难冲突。

用开放寻址法的话，负载因子一般控制在0.7以下，一旦负载因子大于等于0.7，就进行扩容，让分母变大，使负载因子变小，从而减少冲突。

具体代码逻辑如下：
这部分代码就是扩容，并将之前的数据拷贝到新的表当中。

if ((double)sz / table.size() > 0.7)
{
	hashmap<k, v> tmp;
	tmp.table.resize(table.size() * 2);
	for (auto &e : table)
	{
		if(e.s == EXIST)
			tmp.insert(e.key, e.val);
	}
	table.swap(tmp.table);
}

仿函数

我们知道，在priority_queue，map和set里面都可以传入一个仿函数，用于自定义元素比较的规则。

在hash里面也是需要传入仿函数的。

可能会有这个问题：哈希又不比较元素大小，为什么要仿函数？？
但是哈希要取模运算得到下标呀,因此哈希的仿函数实现的是将一个元素转化成整型的功能，只有整型才可以进行模运算。

以下就是两个仿函数：
第一个仿函数是用于将一些浮点数变成整型的。
第二个仿函数是用于将字符串变成整型的。

template<class k>
struct hashfunc
{
	size_t operator()(const k& key)
	{
		return key;
	}
};

template<>
struct hashfunc<string>
{
	size_t operator()(const string& s)
	{
		size_t val = 0;
		for (auto e : s)
		{
			val *= 131;
			val += e;
		}
		return val;
	}
};

对于字符串变成整型，先辈们的做法是,乘131的意义是减少冲突概率。

size_t val = 0;
for (auto e : s)
{
	val *= 131;
	val += e;
}

拉链法（开散列）

先贴代码：

#include <iostream>
#include <vector>
using namespace std;

template<class k, class v>
struct hashnode
{
	hashnode<k, v>* next;
	pair<k, v> p;
	hashnode(pair<k, v> _p = {})
	{
		p.first = _p.first;
		p.second = _p.second;
		next = nullptr;
	}
};

template<class k>
struct hashfunc
{
	size_t operator()(const k& key)
	{
		return key;
	}
};

template<>
struct hashfunc<string>
{
	size_t operator()(const string& s)
	{
		size_t val = 0;
		for (auto e : s)
		{
			val *= 131;
			val += e;
		}
		return val;
	}
};

template<class k, class v, class hash = hashfunc<k>>
class hashmap
{
	typedef hashnode<k, v> node;
public:
	hashmap()
	{
		table.resize(10);
	}

	bool insert(const k& key, const v& val)
	{
		node* res = find(key);
		if (res) return false;

		hash h;
		if (sz == table.size())
		{
			vector<node*> newtable;
			newtable.resize(table.size() * 2, nullptr);
			for (const auto& e : table)
			{
				node* cur = e, *next = e->next;
				while (cur)
				{
					int index = h(cur->p.first) % newtable.size();
					cur->next = newtable[index];
					newtable[index] = cur;
					cur = next;
					if(next) next = next->next;
				}
			}
			table.swap(newtable);
		}

		int index = h(key) % table.size();
		node* cur = new node({ key, val });
		cur->next = table[index];
		table[index] = cur;
		sz++;
	}

	bool erase(const k& key)
	{
		node* res = find(key);
		if (res == nullptr) return false;

		hash h;
		int index = h(key) % table.size();
		node* cur = table[index], *prev = nullptr;
		while (cur)
		{
			if (cur->p.first == key)
			{
				if (cur == table[index]) table[index] = cur->next;
				else prev->next = cur->next;
				delete cur;
				cur = nullptr;
				return true;
			}
			else
			{
				prev = cur;
				cur = cur->next;
			}
		}
		return false;
	}

	node* find(const k& key)
	{
		hash h;
		int index = h(key) % table.size();
		node* cur = table[index];
		while (cur)
		{
			if (cur->p.first == key)
			{
				return cur;
			}
			else
			{
				cur = cur->next;
			}
		}
		return nullptr;
	}
private:
	vector<node*> table;
	size_t sz;
};

拉链法的仿函数实现和开放寻址完全一致，拉链法的重点是结构与负载因子

结构

数组存的链表的头节点，至于你想不想带dummyNode取决于自己。这里实现的时候并没有带dummyNode，因此结构如下：
在这里插入图片描述
如果要删除节点1：

table[index] = 节点2;//头节点换成节点2
delete 节点1;

插入节点的时候采用头插，这样比较方便。

负载因子

拉链法的负载因子应该控制在1以下.
为什么拉链发的负载因子和开放寻址的负载因子不同？
因为拉链法在同样的负载因子下，冲突概率更小。

下面这段代码是实现扩容时拷贝原来节点的问题。
并不要把原来的节点全部复制一份。只需要把原来的节点直接头插到新扩容后的表即可。
ps:这里是没有dummyNode的头插，所以看起来稍微有点奇怪。

没有dummyNode的头插方法是直接插入，然后移动头节点

if (sz == table.size())
{
	vector<node*> newtable;
	newtable.resize(table.size() * 2, nullptr);
	for (const auto& e : table)
	{
		node* cur = e, *next = e->next;
		while (cur)
		{
			int index = h(cur->p.first) % newtable.size();
			cur->next = newtable[index];
			newtable[index] = cur;
			cur = next;
			if(next) next = next->next;
		}
	}
	table.swap(newtable);
}

小丑小丑小丑

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
0
评论
哈希表（开散列和闭散列）简易实现

前言：这里是用oop实现的哈希表，实现的很简易，没有实现迭代器等。用于复习这两个散列方式的思想文章目录开放寻址发（闭散列）负载因子仿函数拉链法（开散列）结构负载因子开放寻址发（闭散列）先贴代码：#include <iostream>#include <vector>using namespace std;enum state{ EMPTY, EXIST, DELETE};template<class k, class v>struct .
复制链接

扫一扫

专栏目录