【 C++ 】用哈希表封装unordered_map和unordered_set

最新推荐文章于 2024-10-09 10:07:13 发布

.滄海难为水

最新推荐文章于 2024-10-09 10:07:13 发布

阅读量1.5k

点赞数 46

分类专栏： C++ 文章标签：散列表 c++ 哈希算法

本文链接：https://blog.csdn.net/JX_BC/article/details/136081453

版权

C++ 专栏收录该内容

28 篇文章 1 订阅

订阅专栏

哈希表源代码

根据先前对unordered_map和unordered_set的学习，我们得知其底层是借助哈希表来实现的，因此这篇博客将用开散列哈希表来模拟实现unordered_map和unordered_set，哈希表源代码如下：

#pragma once
#include<iostream>
#include<string>
#include<vector>
using namespace std;

//利用仿函数将数据类型转换为整型
template<class K>
struct DefaultHash
{
	size_t operator()(const K& key)
	{
		return (size_t)key;
	}
};
//模板的特化
template<>
struct DefaultHash<string>
{
	size_t operator()(const string& key)
	{
		//BKDR哈希算法
		size_t hash = 0;
		for (auto ch : key)
		{
			hash = hash * 131 + ch;//把所有字符的ascii码值累计加起来
		}
		return hash;
	}
};

//闭散列哈希表的模拟实现
namespace CloseHash
{
	enum State
	{
		EMPTY,
		EXIST,
		DELETE
	};
	//哈希节点状态的类
	template<class K, class V>
	struct HashData
	{
		pair<K, V> _kv;
		State _state = EMPTY;//记录每个位置的状态，默认给空
	};


	//哈希表的类
	template<class K, class V, class HashFunc = DefaultHash<K>>//添加仿函数便于把其他类型的数据转换为整型数据
	class HashTable
	{
		typedef HashData<K, V> Data;
	public:
		//插入
		bool Insert(const pair<K, V>& kv)
		{
			//1、去除冗余
			if (Find(kv.first))
			{
				//说明此值已经有了，直接返回false
				return false;
			}
			//2、扩容
				//负载因子超过0.7，就扩容
			if (_tables.size() == 0 || _n * 10 / _tables.size() >= 7)
			{
				size_t newSize = _tables.size() == 0 ? 10 : _tables.size() * 2;
				//扩容以后，需要重新建立映射关系
				HashTable<K, V, HashFunc> newHT;
				newHT._tables.resize(newSize);
				//遍历旧表，把旧表每个存在的元素插入newHT
				for (auto& e : _tables)
				{
					if (e._state == EXIST)
					{
						newHT.Insert(e._kv);
					}
				}
				newHT._tables.swap(_tables);//建立映射关系后交换
			}
			//3、插入
			HashFunc hf;
			size_t starti = hf(kv.first);//取出键值对的key，并且避免了负数的情况，借用仿函数确保是整型数据
			starti %= _tables.size();
			size_t hashi = starti;
			size_t i = 1;
			//线性探测/二次探测
			while (_tables[hashi]._state == EXIST)
			{
				hashi = starti + i;//二次探测改为 +i^2
				++i;
				hashi %= _tables.size();//防止hashi超出数组
			}
			_tables[hashi]._kv = kv;
			_tables[hashi]._state = EXIST;
			_n++;
			return true;
		}
		//查找
		Data* Find(const K& key)
		{
			//判断表的size是否为0
			if (_tables.size() == 0)
			{
				return nullptr;
			}
			HashFunc hf;
			size_t starti = hf(key);//通过仿函数把其它类型数据转为整型数据
			starti %= _tables.size();
			size_t hashi = starti;
			size_t i = 1;
			//线性探测/二次探测
			while (_tables[hashi]._state != EMPTY)//不为空就继续
			{
				if (_tables[hashi]._state != DELETE && _tables[hashi]._kv.first == key)
				{
					return &_tables[hashi];//找到了就返回此对象的地址
				}
				hashi = starti + i;//二次探测改为 +i^2
				++i;
				hashi %= _tables.size();//防止hashi超出数组
			}
			return nullptr;
		}
		//删除
		bool Erase(const K& key)
		{
			//复用Find函数去帮助我们查找删除的值是否存在
			Data* ret = Find(key);
			if (ret)
			{
				//若存在，直接把此位置的状态置为DELETE即可
				ret->_state = DELETE;
				return true;
			}
			else
			{
				return false;
			}
		}
	private:
		vector<Data> _tables;
		size_t _n = 0;//记录存放的有效数据的个数
	};
}

//开散列哈希桶的实现
namespace Bucket
{
	template<class K, class V>
	struct HashNode
	{
		pair<K, V> _kv;
		HashNode<K, V>* _next;
		//构造函数
		HashNode(const pair<K, V>& kv)
			:_kv(kv)
			, _next(nullptr)
		{}
	};
	template<class K, class V, class HashFunc = DefaultHash<K>>
	class HashTable
	{
		typedef HashNode<K, V> Node;
	public:
		//析构函数
		~HashTable()
		{
			for (size_t i = 0; i < _tables.size(); i++)
			{
				Node* cur = _tables[i];
				while (cur)
				{
					Node* next = cur->_next;
					delete cur;
					cur = next;
				}
				_tables[i] = nullptr;//释放后置空
			}
		}
		//插入
		bool Insert(const pair<K, V>& kv)
		{
			//1、去除冗余
			if (Find(kv.first))
			{
				return false;
			}
			//构建仿函数，把数据类型转为整型，便于后续建立映射关系
			HashFunc hf;
			//2、负载因子 == 1就扩容
			if (_tables.size() == _n)
			{
				/*法一
				size_t newSize = _tables.size() == 0 ? 10 : _tables.size() * 2;
				HashTable<K, V> newHT;//
				newHT._tables.resize(newSize, nullptr);
				//遍历旧表，把旧表的数据重新映射到新表
				for (size_t i = 0; i < _tables.size(); i++)
				{
					Node* cur = _tables[i];
					while (cur)//如果cur不为空，就插入
					{
						newHT.Insert(cur->_kv);
						cur = cur->_next;
					}
				}
				newHT._tables.swap(_tables);*/

				//法二：
				size_t newSize = _tables.size() == 0 ? 10 : _tables.size() * 2;
				vector<Node*> newTable;
				newTable.resize(newSize, nullptr);
				for (size_t i = 0; i < _tables.size(); i++)//遍历旧表
				{
					Node* cur = _tables[i];
					while (cur)
					{
						Node* next = cur->_next;
						size_t hashi = hf(cur->_kv.first) % newSize;//确认映射到新表的位置
						//头插
						cur->_next = newTable[hashi];
						newTable[hashi] = cur;
						cur = next;
					}
					_tables[i] = nullptr;
				}
				newTable.swap(_tables);
			}
			//3、头插
			//找到对应的映射位置
			size_t hashi = hf(kv.first);
			hashi %= _tables.size();
			//头插到对应的桶即可
			Node* newnode = new Node(kv);
			newnode->_next = _tables[hashi];
			_tables[hashi] = newnode;
			++_n;
			return true;
		}
		//查找
		Node* Find(const K& key)
		{
			//防止后续除0错误
			if (_tables.size() == 0)
			{
				return nullptr;
			}
			//构建仿函数，把数据类型转为整型，便于后续建立映射关系
			HashFunc hf;
			//找到对应的映射下标位置
			size_t hashi = hf(key);
			//size_t hashi = HashFunc()(key);//匿名对象
			hashi %= _tables.size();
			Node* cur = _tables[hashi];
			//在此位置的链表中进行遍历查找
			while (cur)
			{
				if (cur->_kv.first == key)
				{
					//找到了
					return cur;
				}
				cur = cur->_next;
			}
			//遍历结束，没有找到，返回nullptr
			return nullptr;
		}
		//删除
		/*法一*/
		bool Erase(const K& key)
		{
			//防止后续除0错误
			if (_tables.size() == 0)
			{
				return false;
			}
			//构建仿函数，把数据类型转为整型，便于后续建立映射关系
			HashFunc hf;
			//找到key所对应的哈希桶的位置
			size_t hashi = hf(key);
			hashi %= _tables.size();
			Node* cur = _tables[hashi];
			Node* prev = nullptr;
			//删除
			while (cur)
			{
				if (cur->_kv.first == key)
				{
					if (prev == nullptr)//头删
					{
						_tables[hashi] = cur->_next;
					}
					else//非头删
					{
						prev->_next = cur->_next;
					}
					delete cur;
					return true;
				}
				prev = cur;
				cur = cur->_next;
			}
			return false;
		}
		/*法二替换法……*/

	private:
		//指针数组
		vector<Node*> _tables;
		size_t _n = 0;//记录有效数据的个数
	};
}

下面我将对哈希表进行改造，使其能够很好的封装unordered_map与unordered_set。

哈希函数模板参数的控制

我们都清楚unorderded_map是KV模型，而unordered_set是K模型，而先前实现的哈希表是写死的pair键值对的KV模型，很显然不适用unordered_set模型，因此要实现一个泛型，这就需要我们对哈希表的模板参数进行控制。

更改哈希节点的模板参数：

这里我们把参数设为T，如果后续传入节点的类型为K模型，则T为K模型，若为pair键值对KV模型，则为KV模型，代码如下：

template<class T>
struct HashNode
{
	T _data;
	HashNode<T>* _next;
	//构造函数
	HashNode(const T& data)
		:_data(data)
		, _next(nullptr)
	{}
};

更改哈希表的模板参数：

这里我们把第二个模板参数设为T，便于识别后续传入的数据类型。

//unordered_map -> HashTable<K, pair<K, V>> _ht;
//unordered_set -> HashTable<K, K> _ht;
template<class K, class T, class HashFunc = DefaultHash<K>>
class HashTable

unordered_set的参数控制：

如果上层使用的是unordered_set容器，那么哈希表的参数对应的就是K，K类型。

template<class K>
class unordered_set
{
private:
	Bucket::HashTable<K, K> _ht;
};

unordered_map的参数控制：

如果上层使用的是unordered_map容器，那么哈希表的参数对应的就是K，pair<K, V>类型。

template<class K, class V>
class unordered_map
{
private:
	Bucket::HashTable<K, pair<K, V>> _ht;
};

对上层容器构建仿函数便于后续映射

在哈希映射的过程中，我们需要获得元素的键值，然后通过对应的哈希函数计算获得映射的地址，上一步为了适配_set和_map，我们对模板参数进行了整改，现在哈希节点存储的数据类型是T，T可能是一个键值，也可能是一个键值对，底层的哈希并不知道传入的数据是啥类型，因此我们需要对上层容器套一层仿函数来告诉底层哈希。

unordered_set的仿函数：

针对于unordered_set这种K类型的容器，我们直接返回key即可。

template<class K>
class unordered_set
{
	//仿函数
	struct SetKeyOfT
	{
		const K& operator()(const K& key)
		{
			return key;
		}
	};
private:
	Bucket::HashTable<K, K, SetKeyOfT> _ht;
};

unordered_map的仿函数：

_map的数据类型是pair键值对，我们只需要取出其first第一个数据key然后返回即可。

template<class K, class V>
class unordered_map
{
	//仿函数
	struct MapKeyOfT
	{
		const K& operator()(const pair<K, V>& kv)
		{
			return kv.first;
		}
	};
private:
	Bucket::HashTable<K, pair<K, V>, MapKeyOfT> _ht;
};

更改底层哈希的模板参数：

上层容器的仿函数已经套好，下面需要整改下底层哈希的模板参数来接收上层容器的数据类型。

//unordered_map -> HashTable<K, pair<K, V>, MapKeyOfT> _ht;
//unordered_set -> HashTable<K, K, SetKeyOfT> _ht;
template<class K, class T, class KeyOfT, class HashFunc = DefaultHash<K>>
class HashTable

部分类型无法取模问题

现在，我们已经通过构建仿函数成功获得无论unordered_map还是unordered_set的key数据，但是这又存在一个问题，那就是建立映射关系时存在无法取模的问题，就比如说我现在有一个自定义类型的日期类，那么我传入的模板参数就是Date日期类：

struct Date
{
	Date(int year = 1, int month = 1, int day = 1)
		:_year(year)
		, _month(month)
		, _day(day)
	{}
	bool operator==(const Date& d) const
	{
		return _year == d._year
			&& _month == d._month
			&& _day == d._day;
	}
	int _year;
	int _month;
	int _day;
};
cpp::unordered_set<Date> sd;
sd.insert(Date(2022, 3, 4));

这里的key是自定义类型日期类Date，不能直接转化为整型，我们可以像之前那样整个特化，但是日期类不是常用的类，而且这也相当于是改库了，比较好的解决办法是显示的传一个仿函数，来帮助我们解决取模问题。但是就要把放到哈希表这一层的仿函数挪到上层容器的模板参数那，整改后上层容器的模板参数如下：

unordered_set的模板参数：

template<class K, class HashFunc = DefaultHash<K>>
class unordered_set
{
	//仿函数
	struct SetKeyOfT
	{
		const K& operator()(const K& key)
		{
			return key;
		}
	};
public:
	//插入
	bool insert(const K& key)
	{
		return _ht.Insert(key);
	}
private:
	Bucket::HashTable<K, K, SetKeyOfT, HashFunc> _ht;
};

unordered_map的模板参数：

template<class K, class V, class HashFunc = DefaultHash<K>>
class unordered_map
{
	//仿函数
	struct MapKeyOfT
	{
		const K& operator()(const pair<K, V>& kv)
		{
			return kv.first;
		}
	};
public:
	//插入
	bool insert(const pair<K, V>& kv)
	{
		return _ht.Insert(kv);
	}
private:
	Bucket::HashTable<K, pair<K, V>, MapKeyOfT, HashFunc> _ht;
};

哈希表底层的模板参数：

template<class K, class T, class KeyOfT, class HashFunc>
class HashTable

更改了仿函数的设定方式后，我们就可以针对上述日期类单独写个仿函数并采用BKDRhash算法来帮助我们获取对应的整型，以此完成日期类的映射关系：

struct DateHash
{
	size_t operator()(const Date& d)
	{
		size_t hash = 0;
		hash += d._year;
		hash *= 131;
		hash += d._month;
		hash *= 1313;
		hash += d._day;
		return hash;
	}
};
cpp::unordered_set<Date, DateHash> sd;
sd.insert(Date(2022, 3, 4));
sd.insert(Date(2022, 4, 3));

建立映射关系后，调试窗口如下：

在这里插入图片描述

哈希表底层迭代器的实现

大体框架

这里的框架遵循两个关键点：

哈希表的正向迭代器实际上就是对哈希结点指针进行了封装，但是由于在实现++运算符重载时，需要在哈希表中去寻找下一个非空哈希桶，因此每一个正向迭代器中除了存放节点指针外，还都应该存储一个哈希表的地址。最后写个构造函数，初始化_node和_pht。
这里我们的迭代器的位置放在了哈希表（HashTable）的上面，而我迭代器内部又使用了HashTable，因为编译器是向上寻找，按照此位置摆放，迭代器的类里是找不到HashTable的，所以我们需要把HashTable的类在迭代器前面进行声明。

//哈希表的声明
template<class K, class T, class KeyOfT, class HashFunc>
class HashTable;
//正向迭代器
template<class K, class T, class KeyOfT, class HashFunc>
class __HTIterator
{
	typedef HashNode<T> Node; //哈希节点的类型
	typedef __HTIterator<K, T, KeyOfT, HashFunc> Self; //正向迭代器的类型
public:
	//构造函数
	__HTIterator(Node* node, HashTable<K, T, KeyOfT, HashFunc>* pht)
		:_node(node)
		, _pht(pht)
	{}
	//……
	Node* _node;//节点指针
	HashTable<K, T, KeyOfT, HashFunc>* _pht;//哈希表的地址
};

++运算符重载

假设此时的哈希表结构如图所示：

在这里插入图片描述

注意这里是一个哈希桶结构，每一个桶都是一串单链表，在单个桶中，我们拿到头节点指针，可以挨个遍历++it走完全程，但是这是多串单链表，我们需要保证在一个桶走完后要到下一个桶去，因此在++运算符重载的设定上要按照如下规则：

若当前结点不是当前哈希桶中的最后一个结点，则++后走到当前哈希桶的下一个结点。
若当前结点是当前哈希桶的最后一个结点，则++后走到下一个非空哈希桶的第一个结点。

//++运算符重载
Self& operator++()
{
	if (_node->_next)
	{
		_node = _node->_next;
	}
	else//当前桶已经走完，需要到下一个不为空的桶
	{
		KeyOfT kot;//取出key数据
		HashFunc hf;//转换成整型
		size_t hashi = hf(kot(_node->_data));
		for (; hashi < _pht->_tables.size(); ++hashi)
		{
			if (_pht->_tables[hashi])//更新节点指针到非空的桶
			{
				_node = _pht->_tables[hashi];
				break;
			}
		}
		//没有找到不为空的桶，用nullptr去做end标识
		if (hashi == _pht->_tables.size())
		{
			_node = nullptr;
		}
	}
	return *this;
}

由于正向迭代器中++运算符重载函数在寻找下一个结点时，会访问哈希表中的成员变量_table，而_table成员变量是哈希表的私有成员，因此我们需要将正向迭代器类声明为哈希表类的友元。

template<class K, class T, class KeyOfT, class HashFunc>
class HashTable
{
	//把迭代器设为HashTable的友元
	template<class K, class T, class KeyOfT, class HashFunc>
	friend class __HTIterator;
 
	typedef HashNode<T> Node;//哈希结点类型
public:
	//……
}

!= 和 == 运算符重载

比较两迭代器是否相等，只需要判断两迭代器所封装的节点是否是同一个即可。

//!=运算符重载
bool operator!=(const Self& s) const
{
	return _node != s._node;
}
//==运算符重载
bool operator==(const Self& s) const
{
	return _node == s._node;
}

* 和 -> 运算符重载

*运算符返回的是哈希节点数据的引用。
->运算符返回的是哈希节点数据的地址。

//*运算符重载
T& operator*()
{
	return _node->_data;//返回哈希节点中数据的引用
}
//->运算符重载
T* operator->()
{
	return &(_node->_data);//返回哈希节点中数据的地址
}

哈希表的迭代器相关函数（begin和end）

这里我们需要进行正向迭代器类型的typedef，需要注意的是，为了让外部能够使用typedef后的正向迭代器类型iterator，我们需要在public区域进行typedef。

template<class K, class T, class KeyOfT, class HashFunc>
class HashTable
{
    //把迭代器设为HashTable的友元
	template<class K, class T, class KeyOfT, class HashFunc>
	friend class __HTIterator;
 
	typedef HashNode<T> Node;//哈希结点类型
public:
	typedef __HTIterator<K, T, KeyOfT, HashFunc> iterator;//正向迭代器的类型
}

typedef之后，就可以实现begin()和end()的函数了。

begin()：

遍历哈希表，返回第一个不为空的桶的第一个节点的迭代器位置，借助正向迭代器的构造函数完成，还得传this指针（哈希表的指针）。
若遍历结束没找到，说明哈希表里没有一个是空，直接返回end()尾部的迭代器位置。

//begin
iterator begin()
{
	for (size_t i = 0; i < _tables.size(); i++)
	{
		Node* cur = _tables[i];
		//找到第一个不为空的桶的节点位置
		if (cur)
		{
			return iterator(cur, this);
		}
	}
	return end();
}

end()：

哈希表的end()直接返回迭代器的构造函数（节点指针为空，哈希表指针为this）即可。

//end()
iterator end()
{
	return iterator(nullptr, this);
}

哈希表的优化（素数表）

在除留余数法时，最好模一个素数，这样模完后不会那么容易出现哈希冲突的问题，因此我们可以专门写一个素数表来解决。

//素数表
size_t GetNextPrime(size_t prime)
{
	const int PRIMECOUNT = 28;
	static const size_t primeList[PRIMECOUNT] =
	{
		53ul, 97ul, 193ul, 389ul, 769ul,
		1543ul, 3079ul, 6151ul, 12289ul, 24593ul,
		49157ul, 98317ul, 196613ul, 393241ul, 786433ul,
		1572869ul, 3145739ul, 6291469ul, 12582917ul, 25165843ul,
		50331653ul, 100663319ul, 201326611ul, 402653189ul, 805306457ul,
		1610612741ul, 3221225473ul, 4294967291ul
	};
 
	// 获取比prime大那一个素数
	size_t i = 0;
	for (; i < PRIMECOUNT; ++i)
	{
		if (primeList[i] > prime)
			return primeList[i];
	}
 
	return primeList[i];
}

unordered_map的插入和[ ]运算符重载

insert插入：

unordered_map的数据类型是KV模型，其插入的是一个pair键值对，这里要区别于unordered_set，实现方式也有所区别。

//插入
pair<iterator, bool> insert(const pair<K, V>& kv)
{
	return _ht.Insert(kv);
}

[ ]运算符重载：

首先调用insert函数插入键值对返回迭代器ret。
通过返回的迭代器ret调用元素值value。

//[]运算符重载
V& operator[](const K& key)
{
	pair<iterator, bool> ret = insert(make_pair(key, V()));
	return ret.first->second;
}

封装后源代码

HashTable.h

#pragma once
#include<iostream>
#include<string>
#include<vector>
using namespace std;

//利用仿函数将数据类型转换为整型
template<class K>
struct DefaultHash
{
	size_t operator()(const K& key)
	{
		return (size_t)key;
	}
};
//模板的特化
template<>
struct DefaultHash<string>
{
	size_t operator()(const string& key)
	{
		//BKDR哈希算法
		size_t hash = 0;
		for (auto ch : key)
		{
			hash = hash * 131 + ch;//把所有字符的ascii码值累计加起来
		}
		return hash;
	}
};

//开散列哈希桶的实现
namespace Bucket
{
	//哈希节点的类
	template<class T>
	struct HashNode
	{
		T _data;
		HashNode<T>* _next;
		//构造函数
		HashNode(const T& data)
			:_data(data)
			, _next(nullptr)
		{}
	};

	//哈希表的声明
	template<class K, class T, class KeyOfT, class HashFunc>
	class HashTable;
	//正向迭代器
	template<class K, class T, class KeyOfT, class HashFunc>
	class __HTIterator
	{
		typedef HashNode<T> Node; //哈希节点的类型
		typedef __HTIterator<K, T, KeyOfT, HashFunc> Self; //正向迭代器的类型
	public:
		Node* _node;//节点指针
		HashTable<K, T, KeyOfT, HashFunc>* _pht;//哈希表的地址
		//构造函数
		__HTIterator(){}//默认的
		__HTIterator(Node* node, HashTable<K, T, KeyOfT, HashFunc>* pht)
			:_node(node)
			, _pht(pht)
		{}

		//++运算符重载
		Self& operator++()
		{
			if (_node->_next)
			{
				_node = _node->_next;
			}
			else//当前桶已经走完，需要到下一个不为空的桶
			{
				KeyOfT kot;//取出key数据
				HashFunc hf;//转换成整型
				size_t hashi = hf(kot(_node->_data)) % _pht->_tables.size();
				++hashi;
				for (; hashi < _pht->_tables.size(); ++hashi)
				{
					if (_pht->_tables[hashi])//更新节点指针到非空的桶
					{
						_node = _pht->_tables[hashi];
						break;
					}
				}
				//没有找到不为空的桶，用nullptr去做end标识
				if (hashi == _pht->_tables.size())
				{
					_node = nullptr;
				}
			}
			return *this;
		}
		//*运算符重载
		T& operator*()
		{
			return _node->_data;//返回哈希节点中数据的引用
		}
		//->运算符重载
		T* operator->()
		{
			return &_node->_data;//返回哈希节点中数据的地址
		}

		//!=运算符重载
		bool operator!=(const Self& s) const
		{
			return _node != s._node;
		}
		//==运算符重载
		bool operator==(const Self& s) const
		{
			return _node == s._node;
		}

	};

	//unordered_map -> HashTable<K, pair<K, V>, MapKeyOfT> _ht;
	//unordered_set -> HashTable<K, K, SetKeyOfT> _ht;
	template<class K, class T, class KeyOfT, class HashFunc>
	class HashTable
	{
		//把迭代器设为HashTable的友元
		template<class K, class T, class KeyOfT, class HashFunc>
		friend class __HTIterator;

		typedef HashNode<T> Node;//哈希结点类型
	public:
		typedef __HTIterator<K, T, KeyOfT, HashFunc> iterator;//正向迭代器的类型
		
		//begin
		iterator begin()
		{
			for (size_t i = 0; i < _tables.size(); ++i)
			{
				Node* cur = _tables[i];
				//找到第一个不为空的桶的节点位置
				if (cur)
				{
					return iterator(cur, this);
				}
			}
			return end();
		}
		//end()
		iterator end()
		{
			return iterator(nullptr, this);
		}

		//析构函数
		~HashTable()
		{
			for (size_t i = 0; i < _tables.size(); i++)
			{
				Node* cur = _tables[i];
				while (cur)
				{
					Node* next = cur->_next;
					delete cur;
					cur = next;
				}
				_tables[i] = nullptr;//释放后置空
			}
		}
		//素数表
		size_t GetNextPrime(size_t prime)
		{
			const int PRIMECOUNT = 28;
			static const size_t primeList[PRIMECOUNT] =
			{
				53ul, 97ul, 193ul, 389ul, 769ul,
				1543ul, 3079ul, 6151ul, 12289ul, 24593ul,
				49157ul, 98317ul, 196613ul, 393241ul, 786433ul,
				1572869ul, 3145739ul, 6291469ul, 12582917ul, 25165843ul,
				50331653ul, 100663319ul, 201326611ul, 402653189ul, 805306457ul,
				1610612741ul, 3221225473ul, 4294967291ul
			};

			// 获取比prime大那一个素数
			size_t i = 0;
			for (; i < PRIMECOUNT; ++i)
			{
				if (primeList[i] > prime)
					return primeList[i];
			}

			return primeList[i];
		}

		//插入
		pair<iterator, bool> Insert(const T& data)
		{
			KeyOfT kot;
			//1、去除冗余
			iterator pos = Find(kot(data));
			if (pos != end())
			{
				return make_pair(pos, false);
			}
			//构建仿函数，把数据类型转为整型，便于后续建立映射关系
			HashFunc hf;
			
			//2、负载因子 == 1就扩容
			if (_tables.size() == _n)
			{
				//size_t newSize = _tables.size() == 0 ? 10 : _tables.size() * 2;
				size_t newSize = GetNextPrime(_tables.size());
				if (newSize != _tables.size())//确保合法性，不可能超过素数表的最大值
				{
					vector<Node*> newTable;
					newTable.resize(newSize, nullptr);
					for (size_t i = 0; i < _tables.size(); ++i)//遍历旧表
					{
						Node* cur = _tables[i];
						while (cur)
						{
							Node* next = cur->_next;
							size_t hashi = hf(kot(cur->_data)) % newSize;//确认映射到新表的位置
							//头插
							cur->_next = newTable[hashi];
							newTable[hashi] = cur;
							cur = next;
						}
						_tables[i] = nullptr;
					}
					newTable.swap(_tables);
				}
			}
			//3、头插
			//找到对应的映射位置
			size_t hashi = hf(kot(data));//调用仿函数取出key，并套用仿函数转成整型
			hashi %= _tables.size();
			//头插到对应的桶即可
			Node* newnode = new Node(data);
			newnode->_next = _tables[hashi];
			_tables[hashi] = newnode;
			++_n;
			return make_pair(iterator(newnode, this), false);
		}

		//查找
		iterator Find(const K& key)
		{
			//防止后续除0错误
			if (_tables.size() == 0)
			{
				return iterator(nullptr, this);
			}
			//构建仿函数，把数据类型转为整型，便于后续建立映射关系
			HashFunc hf;
			KeyOfT kot;
			//找到对应的映射下标位置
			size_t hashi = hf(key);
			//size_t hashi = HashFunc()(key);//匿名对象
			hashi %= _tables.size();
			Node* cur = _tables[hashi];
			//在此位置的链表中进行遍历查找
			while (cur)
			{
				if (kot(cur->_data) == key)
				{
					//找到了
					return iterator(cur, this);
				}
				cur = cur->_next;
			}
			//遍历结束，没有找到，返回nullptr
			return iterator(nullptr, this);
		}
		//删除
		bool Erase(const K& key)
		{
			//防止后续除0错误
			if (_tables.size() == 0)
			{
				return false;
			}
			//构建仿函数，把数据类型转为整型，便于后续建立映射关系
			HashFunc hf;
			KeyOfT kot;
			//找到key所对应的哈希桶的位置
			size_t hashi = hf(key);
			hashi %= _tables.size();
			Node* cur = _tables[hashi];
			Node* prev = nullptr;
			//删除
			while (cur)
			{
				if (kot(cur->_data) == key)
				{
					if (prev == nullptr)//头删
					{
						_tables[hashi] = cur->_next;
					}
					else//非头删
					{
						prev->_next = cur->_next;
					}
					delete cur;
					return true;
				}
				prev = cur;
				cur = cur->_next;
			}
			return false;
		}

	private:
		//指针数组
		vector<Node*> _tables;
		size_t _n = 0;//记录有效数据的个数
	};
}

Unordered_Map.h

#pragma once
#include"HashTable.h"
namespace cpp
{
	template<class K, class V, class HashFunc = DefaultHash<K>>
	class unordered_map
	{
		//仿函数
		struct MapKeyOfT
		{
			const K& operator()(const pair<K, V>& kv)
			{
				return kv.first;
			}
		};
	public:
		//现在没有实例化，没办法到HashTable里面找iterator，所以typename就是告诉编译器这里是一个类型，实例化以后再去取
		typedef typename Bucket::HashTable<K, pair<K, V>, MapKeyOfT, HashFunc>::iterator iterator;
		//begin()
		iterator begin()
		{
			return _ht.begin();
		}
		//end()
		iterator end()
		{
			return _ht.end();
		}
		//插入
		pair<iterator, bool> insert(const pair<K, V>& kv)
		{
			return _ht.Insert(kv);
		}
		//[]运算符重载
		V& operator[](const K& key)
		{
			pair<iterator, bool> ret = insert(make_pair(key, V()));
			return ret.first->second;
		}
		//find查找
		iterator find(const K& key)
		{
			return _ht.Find(key);
		}
		//erase删除
		bool erase(const K& key)
		{
			return _ht.Erase(key);
		}
	private:
		Bucket::HashTable<K, pair<K, V>, MapKeyOfT, HashFunc> _ht;
	};
}

Unordered_set.h

#pragma once
#include"HashTable.h"
namespace cpp
{
	template<class K, class HashFunc = DefaultHash<K>>
	class unordered_set
	{
		//仿函数
		struct SetKeyOfT
		{
			const K& operator()(const K& key)
			{
				return key;
			}
		};
	public:
		//现在没有实例化，没办法到HashTable里面找iterator，所以typename就是告诉编译器这里是一个类型，实例化以后再去取
		typedef typename Bucket::HashTable<K, K, SetKeyOfT, HashFunc>::iterator iterator;
		//begin()
		iterator begin()
		{
			return _ht.begin();
		}
		//end()
		iterator end()
		{
			return _ht.end();
		}
		//insert插入
		pair<iterator, bool> insert(const K& key)
		{
			return _ht.Insert(key);
		}
		//find查找
		iterator find(const K& key)
		{
			return _ht.Find(key);
		}
		//erase删除
		bool erase(const K& key)
		{
			return _ht.Erase(key);
		}
	private:
		Bucket::HashTable<K, K, SetKeyOfT, HashFunc> _ht;
	};
}

test.cpp

#define _CRT_SECURE_NO_WARNINGS 1
#include<map>
#include<string>
#include<set>
#include "HashTable.h"
#include "Unordered_Map.h"
#include "Unordered_Set.h"

void test_set()
{
	struct Date
	{
		Date(int year = 1, int month = 1, int day = 1)
			:_year(year)
			, _month(month)
			, _day(day)
		{}
		bool operator==(const Date& d) const
		{
			return _year == d._year 
				&& _month == d._month
				&& _day == d._day;
		}
		int _year;
		int _month;
		int _day;
	};
	struct DateHash
	{
		size_t operator()(const Date& d)
		{
			size_t hash = 0;
			hash += d._year;
			hash *= 131;
			hash += d._month;
			hash *= 1313;
			hash += d._day;
			return hash;
		}
	};
	cpp::unordered_set<Date, DateHash> sd;
	sd.insert(Date(2022, 3, 4));
	sd.insert(Date(2022, 4, 3));




	cpp::unordered_set<int> s;
	//set<int> s;
	s.insert(2);
	s.insert(3);
	s.insert(1);
	s.insert(2);
	s.insert(5);
	s.insert(12);

	cpp::unordered_set<int>::iterator it = s.begin();
	//auto it = s.begin();
	while (it != s.end())
	{
		cout << *it << " ";
		++it;
	}
	cout << endl;
	
	for (auto e : s)
	{
		cout << e << " ";
	}
	cout << endl;
	
}
//void test_op()
//{
//	int n = 10000000;
//	vector<int> v;
//	v.reserve(n);
//	srand(time(0));
//	for (int i = 0; i < n; ++i)
//	{
//		//v.push_back(i);
//		//v.push_back(rand()+i);  // 重复少
//		v.push_back(rand());  // 重复多
//	}
//
//	size_t begin1 = clock();
//	set<int> s;
//	for (auto e : v)
//	{
//		s.insert(e);
//	}
//	size_t end1 = clock();
//
//	size_t begin2 = clock();
//	cpp::unordered_set<int> us;
//	for (auto e : v)
//	{
//		us.insert(e);
//	}
//	size_t end2 = clock();
//
//	cout << s.size() << endl;
//
//	cout << "set insert:" << end1 - begin1 << endl;
//	cout << "unordered_set insert:" << end2 - begin2 << endl;
//
//
//	size_t begin3 = clock();
//	for (auto e : v)
//	{
//		s.find(e);
//	}
//	size_t end3 = clock();
//
//	/*size_t begin4 = clock();
//	for (auto e : v)
//	{
//		us.find(e);
//	}
//	size_t end4 = clock();
//	cout << "set find:" << end3 - begin3 << endl;
//	cout << "unordered_set find:" << end4 - begin4 << endl;*/
//
//
//	size_t begin5 = clock();
//	for (auto e : v)
//	{
//		s.erase(e);
//	}
//	size_t end5 = clock();
//
//	/*size_t begin6 = clock();
//	for (auto e : v)
//	{
//		us.erase(e);
//	}
//	size_t end6 = clock();
//	cout << "set erase:" << end5 - begin5 << endl;
//	cout << "unordered_set erase:" << end6 - begin6 << endl;*/
//}
void test_map()
{
	cpp::unordered_map<string, string> dict;
	dict.insert(make_pair("sort", "排序"));
	dict.insert(make_pair("left", "左边"));
	dict.insert(make_pair("left", "剩余"));
	dict["string"];
	dict["left"] = "剩余";
	dict["string"] = "字符串";
	cpp::unordered_map<string, string>::iterator it = dict.begin();
	while (it != dict.end())
	{
		cout << it->first << " " << it->second << endl;
		++it;
	}
	for (auto& kv : dict)
	{
		cout << kv.first << " " << kv.second << endl;
	}
}

int main()
{
	//test_set();
	//test_op();
	test_map();

	return 0;
}

unordered_set的模拟实现源代码

封装好了底层哈希表，剩下的就是相当于是在套模板了，具体实现的功能有插入、删除、查找、迭代器……功能。

#include"HashTable.h"
namespace cpp
{
	template<class K, class HashFunc = DefaultHash<K>>
	class unordered_set
	{
		//仿函数
		struct SetKeyOfT
		{
			const K& operator()(const K& key)
			{
				return key;
			}
		};
	public:
		//现在没有实例化，没办法到HashTable里面找iterator，所以typename就是告诉编译器这里是一个类型，实例化以后再去取
		typedef typename Bucket::HashTable<K, K, SetKeyOfT, HashFunc>::iterator iterator;
		//begin()
		iterator begin()
		{
			return _ht.begin();
		}
		//end()
		iterator end()
		{
			return _ht.end();
		}
		//insert插入
		pair<iterator, bool> insert(const K& key)
		{
			return _ht.Insert(key);
		}
		//find查找
		iterator find(const K& key)
		{
			return _ht.Find(key);
		}
		//erase删除
		bool erase(const K& key)
		{
			return _ht.Erase(key);
		}
	private:
		Bucket::HashTable<K, K, SetKeyOfT, HashFunc> _ht;
	};
}

unordered_map的模拟实现源代码

实现unordered_map的各个接口时，也是调用底层哈希表对应的接口就行了，此外还需要实现[]运算符的重载。

#include"HashTable.h"
namespace cpp
{
	template<class K, class V, class HashFunc = DefaultHash<K>>
	class unordered_map
	{
		//仿函数
		struct MapKeyOfT
		{
			const K& operator()(const pair<K, V>& kv)
			{
				return kv.first;
			}
		};
	public:
		//现在没有实例化，没办法到HashTable里面找iterator，所以typename就是告诉编译器这里是一个类型，实例化以后再去取
		typedef typename Bucket::HashTable<K, pair<K, V>, MapKeyOfT, HashFunc>::iterator iterator;
		//begin()
		iterator begin()
		{
			return _ht.begin();
		}
		//end()
		iterator end()
		{
			return _ht.end();
		}
		//插入
		pair<iterator, bool> insert(const pair<K, V>& kv)
		{
			return _ht.Insert(kv);
		}
		//[]运算符重载
		V& operator[](const K& key)
		{
			pair<iterator, bool> ret = insert(make_pair(key, V()));
			return ret.first->second;
		}
		//find查找
		iterator find(const K& key)
		{
			return _ht.Find(key);
		}
		//erase删除
		bool erase(const K& key)
		{
			return _ht.Erase(key);
		}
	private:
		Bucket::HashTable<K, pair<K, V>, MapKeyOfT, HashFunc> _ht;
	};
}