unordered_map和unordered_set的模拟实现

最新推荐文章于 2024-08-28 14:22:55 发布

小倪同学 -_-

最新推荐文章于 2024-08-28 14:22:55 发布

阅读量168

点赞数 1

分类专栏： C++ 文章标签： b树数据结构 c++

本文链接：https://blog.csdn.net/qq_56663697/article/details/123341280

版权

C++ 专栏收录该内容

35 篇文章 8 订阅

订阅专栏

文章目录

哈希表模拟代码
哈希表模板参数
模板参数中的仿函数
字符串哈希算法
哈希表成员函数的实现
哈希表迭代器
unordered_set的模拟实现
unordered_map的模拟实现
总体代码
- 哈希表
- 迭代器

哈希表模拟代码

unordered_map和unordered_set底层是用哈希桶实现的，下面是模拟实现哈希表和哈希桶的代码。

闭散列

也叫开放定址法，当发生哈希冲突时，如果哈希表未被装满，说明在哈希表中必然还有空位置，那么可以把key存放到冲突位置中的“下一个” 空位置中去。

在这里插入图片描述

#pragma once
#include<vector>

namespace close_hash
{
	enum Status
	{
		EMPTY,
		EXITS,
		DELETE
	};

	template<class K, class V>
	struct HashData
	{
		pair<K, V> _kv;
		Status _status = EMPTY;
	};

	template<class K>
	struct HashFanc
	{
		size_t operator()(const K& key)
		{
			return key;
		}
	};

	// 特化
	template<>
	struct HashFanc<string>
	{
		size_t operator()(const string& key)
		{
			size_t hash = 0;
			for (size_t i = 0; i < key.size(); i++)
			{
				hash *= 131;
				hash += key[i];
			}
			return hash;
		}
	};

	struct HashFuncString
	{
		size_t operator()(const string& key)
		{
			// BKDR Hash思想
			size_t hash = 0;
			for (size_t i = 0; i < key.size(); ++i)
			{
				hash *= 131;
				hash += key[i];
			}

			return hash;
		}
	};

	template<class K,class V,class Hash=HashFanc<K>>
	class HashTable
	{
	public:
		bool Erase(const K& key)
		{
			HashData<K, V>* ret = Find(key);
			if (ret == nullptr)
			{
				return false;
			}
			else
			{
				ret->_status = DELETE;
				_n--;
				return true;
			}
		}

		HashData<K, V>* Find(const K& key)
		{
			if (_tables.size() == 0)
			{
				return nullptr;
			}

			Hash hf;
			size_t start = hf(key)%_tables.size();
			size_t i = 0;
			size_t index = start + i;
			while (_tables[index]._status != EMPTY)
			{
				if (_tables[index]._kv.first == key
					&& _tables[index]._status == EXITS)
				{
					return &_tables[index];
				}
				else
				{
					++i;
					index = start + i; // 线性探测
					//index = start + i*i; // 二次探测
					index %= _tables.size();
				}
			}

			return nullptr;
		}

		bool Insert(const pair<K, V>& kv)
		{
			if (Find(kv.first))
			{
				return false;
			}

			if (_tables.size() == 0 || _n*10 / _tables.size() >= 7)
			{
				size_t newSize = _tables.size() == 0 ? 10 : _tables.size() * 2;
				HashTable<K, V, Hash> newHT;
				newHT._tables.resize(newSize);
				for (auto& e : _tables)
				{
					if (e._status == EXITS)
					{
						newHT.Insert(e._kv);
					}
				}
				
				_tables.swap(newHT._tables);
			}

			Hash hf;
			size_t start = hf(kv.first)%_tables.size();
			size_t i = 0;
			size_t index = start + i;
			// 线性探测
			while (_tables[index]._status == EXITS)
			{
				++i;
				index = start + i; // 线性探测
				//index = start + i*i; // 二次探测
				index %= _tables.size();
			}

			_tables[index]._kv = kv;
			_tables[index]._status = EXITS;
			++_n;

			return true;
		}
	private:
		vector<HashData<K, V>> _tables;
		size_t _n = 0; // 存储有效数据的个数
	}；
}

开散列

开散列法又叫链地址法(开链法)，首先对关键码集合用散列函数计算散列地址，具有相同地址的关键码归于同一子集合，每一个子集合称为一个桶，各个桶中的元素通过一个单链表链接起来，各链表的头结点存储在哈希表中。

在这里插入图片描述

namespace bucket_bush
{
	template<class K>
	struct HashFanc
	{
		size_t operator()(const K& key)
		{
			return key;
		}
	};

	size_t GetNextPrime(size_t prime)
	{
		const int PRIMECOUNT = 28;
		//素数序列
		static const size_t primeList[PRIMECOUNT] =
		{
			53ul, 97ul, 193ul, 389ul, 769ul,
			1543ul, 3079ul, 6151ul, 12289ul, 24593ul,
			49157ul, 98317ul, 196613ul, 393241ul, 786433ul,
			1572869ul, 3145739ul, 6291469ul, 12582917ul, 25165843ul,
			50331653ul, 100663319ul, 201326611ul, 402653189ul, 805306457ul,
			1610612741ul, 3221225473ul, 4294967291ul
		};

		size_t i = 0;
		for (; i < PRIMECOUNT; ++i)
		{
			if (primeList[i] > prime)
				return primeList[i];
		}

		return primeList[i];
	}

	// 特化
	template<>
	struct HashFanc<string>
	{
		size_t operator()(const string& key)
		{
			size_t hash = 0;
			for (size_t i = 0; i < key.size(); i++)
			{
				hash *= 131;
				hash += key[i];
			}
			return hash;
		}
	};

	template<class K,class V>
	struct HashNode
	{
		pair<K, V> _kv;
		HashNode* _next;

		// 构造结点
		HashNode(const pair<K, V>& kv)
			:_kv(kv)
			, _next(nullptr)
		{}
	};

	template<class K,class V,class Hash=HashFanc<K>>
	class HashTable
	{
		typedef HashNode<K, V> Node;
	public:
			// 拷贝构造
		HashTable(const HashTable& ht)
		{
			_tables.resize(ht._table.size());// 调整表的大小
			for (int i = 0; i < ht._tables.size(); i++)// 将结点拷贝到新表中
			{
				if (ht._tables[i])
				{
					Node* cur = ht._tables[i];
					while (cur)
					{
						Node* copy = new Node(cur->_data);
						copy->_next = _tables[i];
						_tables[i] = copy;
						cur = cur->_next;
					}
				}
			}
			_n = ht._n;// 调整数据
		}

		// 赋值运算符重载
		HashTable& operator=(HashTable ht)
		{
			//交换哈希表中两个成员变量的数据
			_table.swap(ht._table);
			swap(_n, ht._n);

			return *this; //支持连续赋值
		}
		
		~HashTable()
		{
			for (size_t i = 0; i < _tables.size(); i++)
			{
				Node* cur = _tables[i];
				while (cur)
				{
					Node* next = cur->_next;
					delete cur;
					cur = next;
				}
				_tables[i] = nullptr;
			}
		}

		bool Erase(const K& key)
		{
			if (_tables.size() == 0)
			{
				return false;
			}

			Hash hf;
			int index = hf(key)%_tables.size();			
			Node* prev = nullptr;
			Node* cur = _tables[index];
			while (cur)
			{
				if (cur->_kv.first == key)
				{
					if (prev == nullptr)// cur是头结点
					{
						_tables[index] = cur->_next;
					}
					else
					{
						prev->_next = cur->_next;
					}

					delete cur;
					--_n;
					return true;
				}
				else
				{
					prev = cur;
					cur = cur->_next;
				}
			}
			return false;
		}

		Node* Find(const K& key)
		{
			if (_tables.size() == 0)
			{
				return nullptr;
			}

			Hash hf
			size_t index = hf(key)%_tables.size();
			Node* cur = _tables[index];
			while (cur)
			{
				if (cur->_kv.first == key)
				{
					return cur;
				}
				else
				{
					cur = cur->_next;
				}
			}

			return nullptr;
		}

		bool Insert(const pair<K, V>& kv)
		{
			Hash hf;
			// 负载因子为1时扩容
			if (_n == _tables.size())
			{
				//size_t newSize = _tables.size() == 0 ? 10 : _tables.size() * 2;
				size_t newSize = GetNextPrime(_tables.size());

				vector<Node*> newtables;
				newtables.resize(newSize, nullptr);
				for (size_t i = 0; i < _tables.size(); i++)
				{
					Node* cur = _tables[i];
					while (cur)
					{
						Node* next = cur->_next;

						size_t index = hf(cur->_kv.first)%newSize;
						cur->_next = newtables[index];
						newtables[index] = cur;

						cur = next;
					}
					_tables[i] = nullptr;
				}
				newtables.swap(_tables);
			}

			size_t index = hf(kv.first)%_tables.size();
			Node* cur = _tables[index];
			while (cur)
			{
				if (cur->_kv.first == kv.first)
					return false;
				else
					cur = cur->_next;
			}

			Node* newnode = new Node(kv);
			newnode->_next = _tables[index];
			_tables[index] = newnode;

			++_n;
			return true;
		}
	private:
		vector<Node*> _tables;
		size_t _n=0;
	};
}

哈希表模板参数

unordered_set是K模型的容器，unordered_map是KV模型的容器，为了用一份哈希表同时封装出K模型和KV模型的容器，我们需要使用模板。

原来哈希桶结点

template<class K,class V>
struct HashNode
{
	pair<K, V> _kv;
	HashNode* _next;

	HashNode(const pair<K, V>& kv)
		:_kv(kv)
		, _next(nullptr)
	{}
};

将原先键值对模板<class K,class V>修改为<class T>,这里的T可以表示K模型也可表示KV模型，修改后如下

template<class T>
struct HashNode
{
	T _data;
	HashNode<T>* _next;

	// 构造函数
	HashNode(const T& data)
		:_data(data)
		, _next(nullptr)
	{}
};

模板参数中的仿函数

由于结点当中存储的是T，这个T可能是Key，也可能是<Key, Value>键值对。那么当我们需要进行结点的键值比较时该怎么办呢？

这里就需要用到仿函数了，利用仿函数将T中的K值取出来进行比较。

仿函数，就是使一个类的使用看上去像一个函数。其实现就是类中实现一个operator()，这个类就有了类似函数的行为，就是一个仿函数类了。

template<class K, class V, class Hash = HashFanc<K>>
class unordered_map
{
	struct MapKeyOfT
	{
		const K& operator()(const pair <const K, V>& kv) const//返回键值对当中的键值Key
		{
			return kv.first;
		}
	};
private:
	bucket_hash::HashTable<K, pair<const K, V>, Hash, MapKeyOfT> _ht;
};

对于哈希表来说，它并不知道上层容器是map还是set，因此当需要进行两个结点键值的比较时，底层哈希表都会通过传入的仿函数来获取键值Key。

所以，set容器也需要仿函数

template<class K, class Hash = HashFanc<K>>
class unordered_set
{
	struct SetKeyOfT
	{
		const K& operator()(const K& key) const//返回键值Key
		{
			return key;
		}
	};
private:
	bucket_hash::HashTable<K, K, Hash, SetKeyOfT> _ht;
};

字符串哈希算法

而字符串并不是整型，也就意味着字符串不能直接用于计算哈希地址，我们需要通过某种方法将字符串转换成整型后，才能代入哈希函数计算哈希地址。

但遗憾的是，我们无法找到一种能实现字符串和整型之间一对一转换的方法，因为在计算机中，整型的大小是有限的，比如用无符号整型能存储的最大数字是4294967295，而众多字符能构成的字符串的种类却是无限的。

这里我们需要利用字符串哈希算法使每个字符串对应一个整数，虽然无法实现一一对应，但是能大大减少重复的概率。

template<class K>
struct HashFanc
{
	size_t operator()(const K& key)
	{
		return key;
	}
};
// string类型的特化
template<>
struct HashFanc<string>
{
	size_t operator()(const string& key)// BKDRHash算法
	{
		size_t hash = 0;
		for (size_t i = 0; i < key.size(); i++)
		{
			hash *= 131;
			hash += key[i];
		}
		return hash;
	}
};

哈希表成员函数的实现

构造函数

哈希表中有两个成员变量，当我们实例化一个对象时：

_table会自动调用vector的默认构造函数进行初始化。
_n会根据我们所给的缺省值被设置为0。

vector<Node*> _table; //存储链表
size_t _n = 0; //哈希表中的有效元素个数

我们不需要编写构造函数，使用默认生成的构造函数就足够了，但是由于我们后面需要编写拷贝构造函数，编写了拷贝构造函数后，默认的构造函数就不会生成了，此时我们需要使用default关键字显示指定生成默认构造函数。

HashTable() = default; //显示指定生成默认构造函数

拷贝构造函数

为了避免浅拷贝的问题（拷贝出来的哈希表和原哈希表使用同一份数据），我们需要自己写一份拷贝构造函数。

拷贝构造思路如下：

将哈希表的大小调整为ht._table。
遍历将原哈希表的数据拷贝到新建的哈希表中。
更改哈希表当中的有效数据个数。

HashTable(const HashTable& ht)
{
	_tables.resize(ht._table.size());// 调整表的大小
	for (int i = 0; i < ht._tables.size(); i++)// 将结点拷贝到新表中
	{
		if (ht._tables[i])
		{
			Node* cur = ht._tables[i];
			while (cur)
			{
				Node* copy = new Node(cur->_data);
				copy->_next = _tables[i];
				_tables[i] = copy;
				cur = cur->_next;
			}
		}
	}
	_n = ht._n;// 调整数据
}

赋值运算符重载

可以利用传值的特性，间接调用拷贝构造函数，之后将拷贝构造出来的哈希表和当前哈希表的两个成员变量进行交换。当赋值运算符重载函数调用结束后，拷贝构造出来的哈希表会因为出了作用域而被自动析构，此时原哈希表之前的数据也就顺势被释放了。

HashTable& operator=(HashTable ht)
{
	//交换哈希表中两个成员变量的数据
	_table.swap(ht._table);
	swap(_n, ht._n);

	return *this; //支持连续赋值
}

析构函数

因为哈希表的结点都是new出来的，系统默认生成的析构函数并不能释放结点，这样可能会造成内存泄漏。为了避免这个问题我们需要自己设计析构函数。析构函数只需要把所有new出来的结点释放即可。

~HashTable()
{
	//将哈希表当中的结点一个个释放
	for (size_t i = 0; i < _tables.size(); i++)
	{
		Node* cur = _tables[i];// 记录当前桶
		while (cur)// 将该桶中所有数据删去
		{
			Node* next = cur->_next;
			delete cur;
			cur = next;
		}
		_tables[i] = nullptr;//将该哈希桶置空
	}
}

插入数据

思路如下

先判断负载因子（所有桶的数据和表容量的比值），如果大于等于1就扩容，当数据为0时也需要扩容。
查找桶中是否有该数据，如果有直接返回（避免数据冗余）。
将数据插入桶中，有效数据个数加一。

size_t GetNextPrime(size_t prime)
{
	const int PRIMECOUNT = 28;
	static const size_t primeList[PRIMECOUNT] =
	{
		53ul, 97ul, 193ul, 389ul, 769ul,
		1543ul, 3079ul, 6151ul, 12289ul, 24593ul,
		49157ul, 98317ul, 196613ul, 393241ul, 786433ul,
		1572869ul, 3145739ul, 6291469ul, 12582917ul, 25165843ul,
		50331653ul, 100663319ul, 201326611ul, 402653189ul, 805306457ul,
		1610612741ul, 3221225473ul, 4294967291ul
	};

	size_t i = 0;
	for (; i < PRIMECOUNT; ++i)
	{
		if (primeList[i] > prime)
			return primeList[i];
	}

	return primeList[i];
}
	
pair<iterator,bool> Insert(const T& data)
{
	Hash hf;
	KeyOfT kot;

	// 负载因子为1时扩容
	if (_n == _tables.size())
	{
		//size_t newSize = _tables.size() == 0 ? 10 : _tables.size() * 2;
		size_t newSize = GetNextPrime(_tables.size());// 部分源码中使用素数列

		vector<Node*> newtables;// 新建哈希桶
		newtables.resize(newSize, nullptr);
		// 将原来表的数据拷贝到新表中
		for (size_t i = 0; i < _tables.size(); i++)
		{
			// 记录当前哈希桶
			Node* cur = _tables[i];
			// 将桶中数据拷贝到新桶中
			while (cur)
			{
				Node* next = cur->_next;

				const K& key = kot(cur->_data);
				size_t index = hf(key)%newSize;

				cur->_next = newtables[index];
				newtables[index] = cur;

				cur = next;
			}
			_tables[i] = nullptr;
		}
		newtables.swap(_tables);
	}

	// 确定表中哈希桶的位置
	const K& key = kot(data);
	size_t index = hf(key)%_tables.size();
	Node* cur = _tables[index];
	// 遍历，查看桶中是否有该数据，如果有直接返回迭代器
	// （避免数据冗余）
	while (cur)
	{
		if (kot(cur->_data) == kot(data))
			return make_pair(iterator(cur,this),false);
		else
			cur = cur->_next;
	}
	// 将数据头插入哈希桶中
	Node* newnode = new Node(data);
	newnode->_next = _tables[index];
	_tables[index] = newnode;
	// 有效数据个数加一
	++_n;
	return make_pair(iterator(newnode, this), true);
}

查找函数

实现思路

先判断是否存在数据，如果不存在直接返回。
定位数据所在桶的位置，遍历该桶查找数据。

Node* Find(const K& key)
{
	// 数据为0直接返回
	if (_tables.size() == 0)
	{
		return nullptr;
	}

	Hash hf;
	KeyOfT kot;
	// 定位数据所在桶的位置
	size_t index = hf(key)%_tables.size();
	Node* cur = _tables[index];
	// 遍历桶查找数据
	while (cur)
	{
		if (kot(cur->_data) == key)
		{
			return cur;
		}
		else
		{
			cur = cur->_next;
		}
	}

	return nullptr;
}

删除数据

实现思路

先判断是否存在数据，如果不存在直接返回。
定位数据所在桶的位置，遍历该桶查找数据。
删除数据并保持链表的完整性

bool Erase(const K& key)
{
	// 数据为0直接返回
	if (_tables.size() == 0)
	{
		return false;
	}

	Hash hf;
	KeyOfT kot;

	// 定位数据所在桶的位置
	int index = hf(key)%_tables.size();			
	Node* prev = nullptr;
	Node* cur = _tables[index];
	// 删除数据并保持数据的完整性
	while (cur)
	{
		if (kot(cur->_data) == key)// 找到数据
		{
			if (prev == nullptr)// cur是头结点
			{
				_tables[index] = cur->_next;
			}
			else
			{
				prev->_next = cur->_next;
			}

			delete cur;
			--_n;
			return true;
		}
		else
		{
			prev = cur;
			cur = cur->_next;
		}
	}
	return false;
}

哈希表迭代器

哈希表的迭代器实际上就是对哈希结点指针进行了封装，为了遍历，迭代器中应该存储哈希表的地址。

template<class K, class T, class Hash, class KeyOfT>
struct HTIterator
{
	typedef HashNode<T> Node;//哈希结点
	typedef HashTable<K, T, Hash, KeyOfT> HT;//哈希表的类型
	typedef HTIterator<K, T, Hash, KeyOfT> Self;//迭代器的类型

	Node* _node;//结点指针
	HT* _ht;//哈希表的地址
}

构造函数

	HTIterator(Node* node, HT* ht)
		:_node(node)//结点指针
		, _ht(ht)//哈希表的地址
	{}

解引用，直接返回结点数据的引用

T& operator*()
{
	return _node->_data;
}

-> 操作，直接返回数据的地址

T* operator->()
{
	return &_node->_data;
}

判断两个迭代器是否相等，直接判断封装的结点是否相等

bool operator!=(const Self& s)const
{
	return _node != s._node;
}

bool operator==(const Self& s) const
{
	return _node == s._node;
}

++ 操作

若当前结点不是当前哈希桶中的最后一个结点，则++后走到当前哈希桶的下一个结点。
若当前结点是当前哈希桶的最后一个结点，则++后走到下一个非空哈希桶的第一个结点。

Self operator++()
{
	if (_node->_next)// 该结点不是当前哈希桶中的最后一个结点
	{
		_node = _node->_next;// 变为该桶中下一个结点
	}
	else
	{
		// 定位当前桶的位置
		KeyOfT kot;
		const K& key = kot(_node->_data);
		Hash hf;
		size_t index = hf(key) % _ht->_tables.size();
		// 从当前桶下一个位置查找
		++index;
		_node = nullptr;
		while (index < _ht->_tables.size())
		{
			if (_ht->_tables[index])//当前哈希桶非空
			{
				_node = _ht->_tables[index];//++后变为该哈希桶中的第一个结点
				break;
			}
			else
			{
				++index; //当前哈希桶为空	，找下一个哈希桶
			}
		}
		//哈希表中已经没有空桶了，++后变为nullptr
		if (index == _ht->_tables.size())
		{
			_node = nullptr;
		}
	}
	return *this;
}

迭代器实现后，我们需要在哈希表的实现中注意如下操作：

为了让外部能够使用typedef后的正向迭代器类型iterator，我们需要在public区域进行typedef。
由于正向迭代器中++运算符重载函数在寻找下一个结点时，会访问哈希表中的成员变量_table，而_table成员变量是哈希表的私有成员，因此我们需要将正向迭代器类声明为哈希表类的友元。

哈希表中迭代器相关函数

begin函数：返回哈希表中第一个非空哈希桶中的第一个结点的正向迭代器。
end函数：返回空指针的正向迭代器。

template<class K,class T,class Hash,class KeyOfT>
class HashTable
{
	typedef HashNode<T> Node;
	// 友元类，方便迭代器使用哈希表的私有成员
	friend struct HTIterator<K, T, Hash, KeyOfT>;		
public:
	typedef HTIterator<K, T, Hash, KeyOfT> iterator;

	iterator begin()
	{
		//找到第一个非空哈希桶
		for (size_t i = 0; i < _tables.size(); i++)
		{
			if (_tables[i])
			{
				//返回该结点的迭代器
				return iterator(_tables[i], this);
			}
		}

		return end();//哈希桶中无数据返回end()
	}

	iterator end()
	{
		return iterator(nullptr, this);//返回nullptr的迭代器
	}
private:
	vector<Node*> _table; 
	size_t _n = 0; 
}

unordered_set的模拟实现

unordered_set的模拟实现调用哈希表的接口即可

template<class K, class Hash = HashFanc<K>>
class unordered_set
{
	struct SetKeyOfT
	{
		const K& operator()(const K& key) const
		{
			return key;
		}
	};
public:
	//现在没有实例化，没办法到HashTable里面找iterator，所以typename就是告诉编译器这里是一个类型，实例化以后再去取
	typedef typename bucket_hash::HashTable<K, K, Hash, SetKeyOfT>::iterator iterator;
	iterator begin()
	{
		return _ht.begin();
	}

	iterator end()
	{
		return _ht.end();
	}
	//插入
	pair<iterator,bool> insert(const K& key)
	{
		return _ht.Insert(key);
	}
	//删除
	void erase(const K& key)
	{
		_ht.Erase(key);
	}
	//查找
	iterator find(const K& key)
	{
		return _ht.Find(key);
	}
private:
	bucket_hash::HashTable<K, K, Hash, SetKeyOfT> _ht;
};

unordered_map的模拟实现

和unordered_set的模拟实现一样，调用哈希表的接口即可，注意这里多了 [ ] 运算符的重载。

template<class K, class V, class Hash = HashFanc<K>>
class unordered_map
{
	struct MapKeyOfT
	{
		const K& operator()(const pair <const K, V>& kv) const
		{
			return kv.first;
		}
	};
public:
	typedef typename bucket_hash::HashTable<K, pair<const K, V>, Hash, MapKeyOfT>::iterator iterator;
	iterator begin()
	{
		return _ht.begin();
	}

	iterator end()
	{
		return _ht.end();
	}

	V& operator[](const K& key)
	{
		pair<iterator, bool> ret = insert(make_pair(key, V()));
		return ret.first->second;
	}
	//插入
	pair<iterator,bool> insert(const pair<const K, V>& kv)
	{
		return _ht.Insert(kv);
	}
	//删除
	void erase(const K& key)
	{
		_ht.Erase(key);
	}
	//查找
	iterator find(const K& key)
	{
		return _ht.Find(key);
	}
private:
	bucket_hash::HashTable<K, pair<const K, V>, Hash, MapKeyOfT> _ht;
};

总体代码

哈希表

//哈希结点的定义
template<class T>
struct HashNode
{
	T _data;
	HashNode<T>* _next;

	HashNode(const T& data)
		:_data(data)
		, _next(nullptr)
	{}
};

template<class K>
struct HashFanc
{
	size_t operator()(const K& key)
	{
		return key;
	}
};
// string类型的特化
template<>
struct HashFanc<string>
{
	size_t operator()(const string& key)// BKDRHash算法
	{
		size_t hash = 0;
		for (size_t i = 0; i < key.size(); i++)
		{
			hash *= 131;
			hash += key[i];
		}
		return hash;
	}
};

// 素数列
size_t GetNextPrime(size_t prime)
{
	const int PRIMECOUNT = 28;
	static const size_t primeList[PRIMECOUNT] =
	{
		53ul, 97ul, 193ul, 389ul, 769ul,
		1543ul, 3079ul, 6151ul, 12289ul, 24593ul,
		49157ul, 98317ul, 196613ul, 393241ul, 786433ul,
		1572869ul, 3145739ul, 6291469ul, 12582917ul, 25165843ul,
		50331653ul, 100663319ul, 201326611ul, 402653189ul, 805306457ul,
		1610612741ul, 3221225473ul, 4294967291ul
	};

	size_t i = 0;
	for (; i < PRIMECOUNT; ++i)
	{
		if (primeList[i] > prime)
			return primeList[i];
	}

	return primeList[i];
}

// 哈希表
template<class K,class T,class Hash,class KeyOfT>
class HashTable
{
	typedef HashNode<T> Node;
	// 友元类，方便迭代器使用哈希表的私有成员
	friend struct HTIterator<K, T, Hash, KeyOfT>;		
public:
	typedef HTIterator<K, T, Hash, KeyOfT> iterator;

	iterator begin()
	{
		//找到第一个非空哈希桶
		for (size_t i = 0; i < _tables.size(); i++)
		{
			if (_tables[i])
			{
				//返回该结点的迭代器
				return iterator(_tables[i], this);
			}
		}

		return end();//哈希桶中无数据返回end()
	}

	iterator end()
	{
		return iterator(nullptr, this);//返回nullptr的迭代器
	}

	//构造函数
	HashTable() = default; //显示指定生成默认构造
	
	// 拷贝构造
	HashTable(const HashTable& ht)
	{
		_tables.resize(ht._table.size());// 调整表的大小
		for (int i = 0; i < ht._tables.size(); i++)// 将结点拷贝到新表中
		{
			if (ht._tables[i])
			{
				Node* cur = ht._tables[i];
				while (cur)
				{
					Node* copy = new Node(cur->_data);
					copy->_next = _tables[i];
					_tables[i] = copy;
					cur = cur->_next;
				}
			}
		}
		_n = ht._n;// 调整数据
	}

	// 赋值运算符重载
	HashTable& operator=(HashTable ht)
	{
		//交换哈希表中两个成员变量的数据
		_table.swap(ht._table);
		swap(_n, ht._n);

		return *this; //支持连续赋值
	}


	~HashTable()
	{
		//将哈希表当中的结点一个个释放
		for (size_t i = 0; i < _tables.size(); i++)
		{
			Node* cur = _tables[i];// 记录当前桶
			while (cur)// 将该桶中所有数据删去
			{
				Node* next = cur->_next;
				delete cur;
				cur = next;
			}
			_tables[i] = nullptr;//将该哈希桶置空
		}
	}

	bool Erase(const K& key)
	{
		// 数据为0直接返回
		if (_tables.size() == 0)
		{
			return false;
		}

		Hash hf;
		KeyOfT kot;

		// 定位数据所在桶的位置
		int index = hf(key)%_tables.size();			
		Node* prev = nullptr;
		Node* cur = _tables[index];
		// 删除数据并保持数据的完整性
		while (cur)
		{
			if (kot(cur->_data) == key)// 找到数据
			{
				if (prev == nullptr)// cur是头结点
				{
					_tables[index] = cur->_next;
				}
				else
				{
					prev->_next = cur->_next;
				}

				delete cur;
				--_n;
				return true;
			}
			else
			{
				prev = cur;
				cur = cur->_next;
			}
		}
		return false;
	}

	Node* Find(const K& key)
	{
		// 数据为0直接返回
		if (_tables.size() == 0)
		{
			return nullptr;
		}

		Hash hf;
		KeyOfT kot;
		// 定位数据所在桶的位置
		size_t index = hf(key)%_tables.size();
		Node* cur = _tables[index];
		// 遍历桶查找数据
		while (cur)
		{
			if (kot(cur->_data) == key)
			{
				return cur;
			}
			else
			{
				cur = cur->_next;
			}
		}

		return nullptr;
	}

	pair<iterator,bool> Insert(const T& data)
	{
		Hash hf;
		KeyOfT kot;

		// 负载因子为1时扩容
		if (_n == _tables.size())
		{
			//size_t newSize = _tables.size() == 0 ? 10 : _tables.size() * 2;
			size_t newSize = GetNextPrime(_tables.size());// 部分源码中使用素数列

			vector<Node*> newtables;// 新建哈希桶
			newtables.resize(newSize, nullptr);
			// 将原来表的数据拷贝到新表中
			for (size_t i = 0; i < _tables.size(); i++)
			{
				// 记录当前哈希桶
				Node* cur = _tables[i];
				// 将桶中数据拷贝到新桶中
				while (cur)
				{
					Node* next = cur->_next;

					const K& key = kot(cur->_data);
					size_t index = hf(key)%newSize;

					cur->_next = newtables[index];
					newtables[index] = cur;

					cur = next;
				}
				_tables[i] = nullptr;
			}
			newtables.swap(_tables);
		}

		// 确定表中哈希桶的位置
		const K& key = kot(data);
		size_t index = hf(key)%_tables.size();
		Node* cur = _tables[index];
		// 遍历，查看桶中是否有该数据，如果有直接返回迭代器
		// （避免数据冗余）
		while (cur)
		{
			if (kot(cur->_data) == kot(data))
				return make_pair(iterator(cur,this),false);
			else
				cur = cur->_next;
		}
		// 将数据头插入哈希桶中
		Node* newnode = new Node(data);
		newnode->_next = _tables[index];
		_tables[index] = newnode;
		// 有效数据个数加一
		++_n;
		return make_pair(iterator(newnode, this), true);
	}
private:
	vector<Node*> _tables; // 存储链表
	size_t _n=0;// 哈希表中的有效元素个数
};

迭代器

template<class K, class T, class Hash, class KeyOfT>
struct HTIterator
{
	typedef HashNode<T> Node;//哈希结点
	typedef HashTable<K, T, Hash, KeyOfT> HT;//哈希表的类型
	typedef HTIterator<K, T, Hash, KeyOfT> Self;//迭代器的类型

	Node* _node;//结点指针
	HT* _ht;//哈希表的地址

	HTIterator(Node* node, HT* ht)
		:_node(node)
		, _ht(ht)
	{}

	bool operator!=(const Self& s)const
	{
		return _node != s._node;
	}

	bool operator==(const Self& s) const
	{
		return _node == s._node;
	}

	T& operator*()
	{
		return _node->_data;
	}

	T* operator->()
	{
		return &_node->_data;
	}

	Self operator++()
	{
		if (_node->_next)// 该结点不是当前哈希桶中的最后一个结点
		{
			_node = _node->_next;// 变为该桶中下一个结点
		}
		else
		{
			// 定位当前桶的位置
			KeyOfT kot;
			const K& key = kot(_node->_data);
			Hash hf;
			size_t index = hf(key) % _ht->_tables.size();
			// 从当前桶下一个位置查找
			++index;
			_node = nullptr;
			while (index < _ht->_tables.size())
			{
				if (_ht->_tables[index])//当前哈希桶非空
				{
					_node = _ht->_tables[index];//++后变为该哈希桶中的第一个结点
					break;
				}
				else
				{
					++index; //当前哈希桶为空	，找下一个哈希桶
				}
			}
			//哈希表中已经没有空桶了，++后变为nullptr
			if (index == _ht->_tables.size())
			{
				_node = nullptr;
			}
		}
		return *this;
	}
};

小倪同学 -_-

关注

1
点赞
踩
0

收藏

觉得还不错? 一键收藏
0
评论
unordered_map和unordered_set的模拟实现

文章目录一级目录哈希表模板参数哈希表成员函数的实现构造函数拷贝构造函数赋值运算符重载析构函数一级目录unordered_map和unordered_set底层是用哈希桶实现的，下面是模拟实现哈希表和哈希桶的代码。哈希表#pragma once#include<vector>// 闭散列namespace close_hash{ enum Status { EMPTY, EXITS, DELETE }; template<class K, class
复制链接

扫一扫

专栏目录