【C++】---STL之用哈希桶模拟实现：unordered_set和unordered_map

小能软糖_

于 2024-09-03 16:00:29 发布

阅读量671

点赞数 12

分类专栏： C++ 文章标签：哈希算法 c++ 散列表

本文链接：https://blog.csdn.net/weixin_75128035/article/details/141830318

版权

C++ 专栏收录该内容

28 篇文章 2 订阅

订阅专栏

一、哈希桶节点的修改

如果我们要用哈希桶对unordered_set和unordered_map进行封装的话，我们面临一个问题那就是 unordered_set传给哈希桶的是K，而unordered_map传给哈希桶的是pair，同样我们之前也遇到过set和map的封装，是用的红黑树。
面对传给底层的数据类型不同怎么办？我们统一用T来代替：K和pair。

template <class T>
struct HashNode
{
	T _data;
	 HashNode<T>* _next;

	//构造
	 HashNode(const T& data)
	 	:_data(data)
	 	,_next(nullptr)
	 {
	 	
	 }

};

二、哈希表

关于哈希表的第一个类模板参数就是K，因为这个K不能去掉，因为K可以用来Find()函数等需要用到！

template<class K,class T,class KeyOfT,class Hash=HashFunc<K>>
class HashTable
{
	typedef HashNode<T> Node;
	// 因为__HashIterator要访问： HashTable的私有成员变量_tables[i] ，所以在class HashTable里面要加入友元！
	friend struct __HashIterator;
	
private:
	vector<Node*> _tables;// 指针数组！
	size_t _n;// 标记有效元素个数
	
public: 
	typedef __HashIterator<K,T,KeyOfT,Hash> iterator;
};

1、构造

	HashTable()
{
	_tables.resize(10,nullptr);// 先开10个空间
	_n=0;
};

2、拷贝构造

        //拷贝构造
		HashTable(const HashTable& ht)
		{
			_n = ht._n;//存储有效数据的个数一致
			_table.resize(ht._table.size());//开同样大小的空间
 
			//遍历ht，将ht的_table的每个结点都拷贝到_table中
			for (size_t i = 0; i < ht._table.size(); i++)
			{
				Node* cur = ht._table[i];
				while (cur)
				{
					Node* copy = new Node(cur->_data);
		
					//头插到新表
					copy->_next = _table[i];//copy的下一个桶为_table[i]
					_table[i] = copy;//把copy作为当前位置的第一个桶
					cur = cur->_next;//cur往下移					
				}
			}
		
		}

3、赋值运算符重载

		//赋值运算符重载
		HashTable& operator=(HashTable ht)
		{
			_table.swap(ht._table);
			swap(_n, ht._n);
 
			return *this;
		}

4、析构

// 析构函数:针对于自定义类型(实际上就是链表的析构)

因为哈希桶：①哈希表②表下面挂的节点（节点是我们的自定义类型，对于自定义类型，我们需要自己写一个析构函数来销毁他）

~HashTable()
{
	for(size_t i=0;i<_tables.size();i++)
	{
		Node* cur= _tables[i];
		while(cur)
		{
			Node* next=cur->_next;
			delete cur;
			cur=next;
		}
		_tables[i]=nullptr;
	}
};

5、迭代器

iterator begin()
		{
			for (size_t i = 0; i < _tables.size(); i++)
			{
				Node* cur = _tables[i];
				if (cur)
					return iterator(cur, this);
			}
			return end();
		}

iterator end()
		{
			return iterator(nullptr, this);
		}

const_iterator begin() const
		{
			for (size_t i = 0; i < _tables.size(); i++)
			{
				Node* cur = _tables[i];
				if (cur)
				{
					// this -> const HashTable*
					return const_iterator(cur, this);
				}
			}

			return end();
		}

const_iterator end() const
		{
			return const_iterator(nullptr, this);
		}

6、插入

// 1、插入
		bool Insert(const T& data)
		{
			// 扩容：
			// 负载因子==1的时候就扩容！

			KeyOfT kot;
			Hash hs;
			if (Find(kot(data)))
				return false;

			// 方法二：遍历旧数组、直接将旧表的数据计算好位置，然后直接插入到新表。

			if (_n == _tables.size())// 扩容
			{
				vector<Node*> new_table(_tables.size() * 2, nullptr);


				//遍历旧数组
				for (size_t i = 0; i < _tables.size(); i++)
				{
					Node* cur = _tables[i];
					while (cur)
					{
						Node* next = cur->_next;// 先保存一下 next

						// 头插到新表相应位置！

						// 老套路：先计算位置
						size_t hashi = cur->_kv.first % new_table.size();
						// 头插：不要搞错了：cur是旧表中将要插入的数据！
						// 头插思路：
						// ①new[hashi]就是桶的第一个节点，操作：只需要将要插入节点的_next指向new就好了
						// ②然后更新 桶的第一个位置！！！
					
						cur->_next= new_table[hashi]
						new_table[hashi] = cur;

						cur = next;// 继续对原数组进行遍历查找出所有带有桶节点的。

					}
					_tables[i] = nullptr;// 防止 有两个位置 指向同一个节点！对原数组的数据进行指向空
				}
				_tables.swap(new_table);

			}
			

			size_t hashi =hs(kot(data))% _tables.size();

			Node* newnode = new Node(data);
			// 头插
			newnode->_next = _tables[hashi];
			_tables[hashi] = newnode;

			++_n;
			return true;
		}

7、查找

// 2、查找：
		Node* Find(const K& key)
		{

			Hash hs;
			
			size_t hashi = hs(key) % _tables.size();

			Node* cur = _tables[hashi];
			while (cur)
			{
				// 找到了就返回！
				if (cur->_data == key)
					return cur;
				// 没找到 就继续往下遍历！
				cur = cur->_next;
			}
			return nullptr;
		}

8、删除

// 3、删除：
		bool Erase(const K& key)
		{
			// 删除就相当于在单链表中进行删除，需要保存前一个位置:prev

			// 大致分两种情况：①删除头结点 ②删除中间 和 尾结点


			Hash hs;
			KeyOfT kot;

			size_t hashi = hs(kot(key)) % _tables.size();
			Node* prev = nullptr;
			Node* cur = _tables[hashi];

			while (cur)
			{
				if (cur->_data== key)
				{
					// 可以删除了

					// ①如果删除的是头结点
					if (prev == nullptr)
					{
						_tables[hashi] = cur->_next;
					}
					else
					{
						prev->_next = cur->_next;
					}

					delete cur;
					return true;
				}
				else
				{
					prev = cur;
					cur = cur->_next;
				}
			}
			return false;
		}

三、迭代器

迭代器需要前置声明HashTable，因为HashTable类中使用了__HTIterator迭代器，且__HTIterator中使用了HashTable类的指针，为什么要用指针呢？
因为C++编译器自上而下编译源文件的时候，对每一个数据的定义，需要知道定义的数据类型的大小。在预先声明语句class HashTable;之后，编译器已经知道HashTable是一个类，但是其中的数据却是未知的，因此HashTable类型的大小也不知道，这样就造成了编译失败；将__HTIterator中的_node更改为HashTable指针类型之后，由于在特定的平台上，指针所占的空间是一定的（在Win32平台上是4字节，在64位平台上是8字节），这样可以通过编译。

迭代器的参数包含哈希节点和哈希表：

	// 前置声明
	template<class K, class T, class KeyOfT, class HashFunc>
	class HashTable;
 
	// 迭代器
	template<class K, class T, class KeyOfT, class HashFunc = Hash<K>>
	struct __HTIterator
	{
		typedef HashNode<T> Node;//哈希节点
		typedef __HTIterator<K, T, KeyOfT, HashFunc> Self;//实现++、--
		typedef HashTable<K, T, KeyOfT, HashFunc> HT;//哈希表
		Node* _node;
		HT* _pht;
 
		__HTIterator(Node* node, HT* pht)
			:_node(node)//哈希节点
			, _pht(pht)//哈希表
		{}
    };

1、operator++()

迭代器最重要的部分呢~
在这里插入图片描述

Self& operator++()
		{

			// 1、一个桶没遍历完的情况！
			if (_node->_next)
			{
				_node = _node->_next;
			}
			// 2、一个桶遍历完了，寻找下一个有节点的桶！
			else
			{
				// 下一个有节点的桶 怎么找？就需要利用原来哈希表
				KeyOfT kot;
				Hash hs;
				size_t i = hs(kot(_node->_data)) % _tables.size();// 这是目前 遍历完一个桶的下标i

				++i;
				for (; i < _pht->_tables.size(); i++)
				{
					if (_pht->_tables[i])
					{
						break;
					}
					
				}

				// 此处的_tables[i]就是我们的目标：下一个不为空的桶
				if (i == _pht->_tables.size())
				{
					_node = nullptr;
				}
				else
				{
					_node =_pht->_tables[i];
				}

			}

			return *this;
		}

2、operator*()

T& operator*()
		{
			return _node->_data;
		}

3、operator->()

Ptr operator->()
		{
			return &_node->_data;
		}

4、operator！=()

bool operator!=(const Self& s)
		{
			return _node != s._node;
		}

5、operator==()

		bool operator == (const Self& s)
		{
			return _node == s._node;
		}

四、封装unordered_set和unordered_map

1、封装unordered_set

unordered_set的成员就是哈希表，不过要传模板参数，SetKeyOfT就是传给哈希表的仿函数

#pragma once
#include "HashTable.h"
 
namespace yjl
{
	template<class K>
	class unordered_set
	{
 
	private:
		OpenHash::HashTable<K, K, SetKeyOfT> _ht ;
 
	};
}

（1）仿函数SetKeyOfT

		//set就取k
		struct SetKeyOfT
		{
			const K& operator()(const K& k)
			{
				return k;
			}
		};

（2）迭代器

对于unordered_set的迭代器，如果不加typename，类模板里面找迭代器的类型找不到，因为OpenHash::HashTable<K, K, SetKeyOfT>::iterator没有实例化，因此编译器并不知道它是类型还是成员函数或成员变量，编译无法通过。
用typename表明OpenHash::HashTable<K, K, SetKeyOfT>::iterator是一个类型，不是成员函数或成员变量，不需要等到实例化时才能确定。

	public:
		typedef typename OpenHash::HashTable<K, K, SetKeyOfT>::iterator iterator;
 
		iterator begin()
		{
			return _ht.begin();
		}
 
		iterator end()
		{
			return _ht.end();
		}
 
		pair<iterator, bool> Insert(const K& k)
		{
			return _ht.Insert(k);
		}

（3）实例

	void test_unordered_set1()
	{
		unordered_set<int> us;
		us.Insert(100);
		us.Insert(5);
		us.Insert(6);
		us.Insert(32);
		us.Insert(8);
		us.Insert(14);
		us.Insert(65);
		us.Insert(27);
		us.Insert(39);
 
		unordered_set<int>::iterator it = us.begin();
		while (it != us.end())
		{
			cout << *it << " ";
			++it;
		}
		cout << endl;
	}

2、封装unordered_map

unordered_map的成员就是哈希表，不过要传模板参数，MapKeyOfT就是传给哈希表的仿函数

#pragma once
#include "HashTable.h"
 
namespace yjl
{
	template<class K,class V>
	class unordered_map
    {
 
    
    private:
		OpenHash::HashTable<K, pair<K, V>, MapKeyOfT> _ht;
 
	};

（1）仿函数MapKeyOfT

		//map就取kv的first
		struct MapKeyOfT
		{
			const K& operator()(const pair<K,V>& kv)
			{
				return kv.first;
			}
		};

（2）迭代器

	public:
		typedef typename OpenHash::HashTable<K, pair<K,V>, MapKeyOfT>::iterator iterator;
 
		iterator begin()
		{
			return _ht.begin();
		}
 
		iterator end()
		{
			return _ht.end();
		}
 
		pair<iterator, bool> Insert(const pair<K,V>& kv)
		{
			return _ht.Insert(kv);
		}
 
		V& operator[](const K& key)
		{
			pair<iterator, bool> ret = _ht.Insert(make_pair(key, V()));
			return ret.first->iterator;
		}

（3）实例

	void test_unordered_map1()
	{
		unordered_map<string,string> um;
		um.Insert(make_pair("spring", "春天"));
		um.Insert(make_pair("summer", "夏天"));
		um.Insert(make_pair("autumn", "秋天"));
		um.Insert(make_pair("winter", "冬天"));
 
		unordered_map<string,string>::iterator it = um.begin();
		while (it != um.end())
		{
			cout << it->first << ":" << it->second << endl;
			++it;
		}
	}

五、完整代码段

HashTable.h

#pragma once
#include<vector>
#include<iostream>
using namespace std;
 
namespace OpenHash
{
	//哈希仿函数
	template<class K>
	struct Hash
	{
		size_t operator()(const K& key)
		{
			return key;
		}
	};
 
	//string仿函数
	template<>
	struct Hash<string>//模板特化
	{
		size_t operator()(const string& s)
		{
			size_t value = 0;
			for (auto e : s)
			{
				value += e;
				value *= 131;
			}
 
			return value;
		}
	};
 
	template<class T>
	struct HashNode
	{
		HashNode<T>* _next;
		T _data;
 
		HashNode(const T& data)
			:_data(data)
			, _next(nullptr)
		{}
	};
 
	//前置声明HashTable,因为HashTable类中使用了__HTIterator，且__HTIterator中使用了HashTable类的指针，为什么要用指针
	//C++编译器自上而下编译源文件的时候，对每一个数据的定义，总是需要知道定义的数据类型的大小。在预先声明语句class HashTable;之后，编译器已经知道HashTable是一个类，但是其中的数据却是未知的，因此HashTable类型的大小也不知道，这样就造成了编译失败；将__HTIterator中的_node更改为HashTable指针类型之后，由于在特定的平台上，指针所占的空间是一定的（在Win32平台上是4字节，在64位平台上是8字节），这样可以通过编译
		// 前置声明
	template<class K, class T, class KeyOfT, class HashFunc>
	class HashTable;
 
	// 迭代器
	template<class K, class T, class KeyOfT, class HashFunc = Hash<K>>
	struct __HTIterator
	{
		typedef HashNode<T> Node;//哈希节点
		typedef __HTIterator<K, T, KeyOfT, HashFunc> Self;//实现++、--
		typedef HashTable<K, T, KeyOfT, HashFunc> HT;//哈希表
		Node* _node;
		HT* _pht;
 
		__HTIterator(Node* node, HT* pht)
			:_node(node)//哈希节点
			, _pht(pht)//哈希表
		{}
	
		Self& operator++()
		{
			//不是当前位置最后一个节点
			if (_node->_next)
			{
				_node = _node->_next;
			}
			else//是当前位置最后一个节点
			{
				KeyOfT kot;
				HashFunc hf;
 
				size_t index = hf(kot(_node->_data)) % _pht->_table.size();
 
				//找哈希表中下一个不为空的位置
				++index;
				while (index < _pht->_table.size())
				{
					if (_pht->_table[index])//不为空
					{
						_node = _pht->_table[index];
						return *this;
					}
					else//为空
					{
						++index;
					}
				}
				_node = nullptr;//后面的所有位置都为空，_node置空，返回当前位置即可
			}
 
			return *this;
		}
 
		T& operator*()
		{
			return _node->_data;
		}
 
		T* operator->()
		{
			return &_node->_data;
		}
 
		bool operator != (const Self& s) const
		{
			return _node != s._node;
		}
 
		bool operator == (const Self& s) const
		{
			return _node == s._node;
		}
	};
 
	template<class K, class T, class KeyOfT, class HashFunc = Hash<K>>
	class HashTable
	{
		typedef HashNode<T> Node;
 
		template<class K,class T,class KeyOfT,class HashFunc>
		friend struct __HTIterator;
	public:
		typedef __HTIterator<K, T, KeyOfT, HashFunc> iterator;
	
		//构造函数,default指定生成默认构造函数
		HashTable() = default;
 
		//拷贝构造
		HashTable(const HashTable& ht)
		{
			_n = ht._n;//存储有效数据的个数一致
			_table.resize(ht._table.size());//开同样大小的空间
 
			//遍历ht，将ht的每个结点都拷贝到_table中
			for (size_t i = 0; i < ht._table.size(); i++)
			{
				Node* cur = ht._table[i];
				while (cur)
				{
					Node* copy = new Node(cur->_data);
		
					//头插到新表
					copy->_next = _table[i];//copy的下一个桶为_table[i]
					_table[i] = copy;//把copy作为当前位置的第一个桶
					cur = cur->_next;//cur往下移					
				}
			}
		
		}
 
		//赋值运算符重载
		HashTable& operator=(HashTable ht)
		{
			_table.swap(ht._table);
			swap(_n, ht._n);
 
			return *this;
		}
 
		//析构
		~HashTable()
		{
			for (size_t i = 0; i < _table.size(); i++)
			{
				Node* cur = _table[i];
				while (cur)
				{
					Node* next = cur->_next;
					delete cur;
					cur = next;
				}
				_table[i] = nullptr;
			}
		}
 
		iterator begin()
		{
			size_t i = 0;
			while (i < _table.size())
			{
				if (_table[i])
				{
					return iterator(_table[i], this);
				}
				++i;
			}
	
			return end();
		}
		
		iterator end()
		{
			return iterator(nullptr, this);
		}
	
		size_t GetNextPrime(size_t prime)
		{
			const int PRIMECOUNT = 28;
			static const size_t primeList[PRIMECOUNT] =
			{
				53ul,97ul,193ul,389ul,769ul,
				1543ul,3079ul,6151ul,12289ul,24593ul,
				49157ul, 98317ul, 196613ul, 393241ul, 786433ul,
				1572869ul, 3145739ul, 6291469ul, 12582917ul, 25165843ul,
				50331653ul, 100663319ul, 201326611ul, 402653189ul, 805306457ul,
				1610612741ul, 3221225473ul, 4294967291ul
			};
	
			size_t i = 0;
			for (; i < PRIMECOUNT; i++)
			{
				if (primeList[i] > prime)
				{
					return primeList[i];
				}
			}
	
			return primeList[i];
		}
 
		//插入
		pair<iterator,bool> Insert(const T& data)
		{
			KeyOfT kot;
			auto ret = Find(kot(data));
			if (ret != end())
			{
				return make_pair(ret,false);
			}
		
			//仿函数
			HashFunc hf;
		
			//负载因子为1时，进行增容
			if (_n == _table.size())
			{
				vector<Node*> newTable;
				newTable.resize(GetNextPrime(_table.size()));
		
				//取旧表中的结点，重新计算映射到新表中的位置，挂到新表中
				for (size_t i = 0; i < _table.size(); i++)
				{
					if (_table[i])
					{
						Node* cur = _table[i];
						while (cur)
						{
							Node* next = cur->_next;//保存下一个结点
							size_t index = hf(kot(cur->_data)) % newTable.size();//计算结映射到新表中的位置
		
							//头插
							cur->_next = newTable[index];//=nullptr,将cur->_next置空
							newTable[index] = cur;//将结点插入到新表
							cur = next;//哈希桶往下挪一个
						}
						_table[i] = nullptr;//当前哈希桶的第一个位置置空
					}
				}
				_table.swap(newTable);
			}
		
			//不需要增容时，头插
			size_t index = hf(kot(data)) % _table.size();
			Node* newNode = new Node(data);
		
			newNode->_next = _table[index];//让新节点newNode的next指向第一个桶
			_table[index] = newNode;//让新节点newNode做第一个桶
			++_n;//更新哈希表大小	
 
			return make_pair(iterator(newNode, this), true);
		}
		
		//查找
		iterator Find(const K& key)
		{
			//哈希表为空
			if (_table.size() == 0)
			{
				return end();
			}
		
			KeyOfT kot;
			HashFunc hf;
			size_t index = hf(key) % _table.size();//计算在哈希表中的位置
			
			//在哈希表当前位置的所有桶中找key
			Node* cur = _table[index];
			while (cur)
			{
				if (kot(cur->_data) == key)
				{
					return iterator(cur,this);
				}
				else
				{
					cur = cur->_next;
				}
			}
		
			return end();
		}
		
		//删除
		bool Erase(const K& key)
		{
			size_t index = hf(key) % _table.size();
			Node* prev = nullptr;
			Node* cur = _table[index];
		
			while (cur)
			{
				if (kot(cur->data) == key)//cur这个桶就是key
				{
					if (_table[index] == cur)//cur是第一个桶
					{
						_table[index] = cur->_next;
					}
					else//cur不是第一个桶
					{
						prev->_next = cur->_next;
					}
		
					--_n;//更新表大小
					delete cur;//删除节点
					return true;
				}
		
				prev = cur;
				cur = cur->_next;
			}
		
			return false;
		}
 
	private:
		vector<Node*> _table;
		size_t _n = 0;
	};
}

UnorderedSet.h

#pragma once
#include "HashTable.h"
 
namespace yjl
{
	template<class K>
	class unordered_set
	{
		//set就取k
		struct SetKeyOfT
		{
			const K& operator()(const K& k)
			{
				return k;
			}
		};
 
	public:
		//如果不加typename，类模板里面找迭代器的类型找不到，因为OpenHash::HashTable<K, K, SetKeyOfT>::iterator没有实例化，因此编译器并不知道它是类型还是成员函数或成员变量，编译无法通过
		//用typename表明OpenHash::HashTable<K, K, SetKeyOfT>::iterator是一个类型，不是成员函数或成员变量，不需要等到实例化时才能确定
		typedef typename OpenHash::HashTable<K, K, SetKeyOfT>::iterator iterator;
 
		iterator begin()
		{
			return _ht.begin();
		}
 
		iterator end()
		{
			return _ht.end();
		}
 
		pair<iterator, bool> Insert(const K& k)
		{
			return _ht.Insert(k);
		}
 
	private:
		OpenHash::HashTable<K, K, SetKeyOfT> _ht ;
 
	};
 
	void test_unordered_set1()
	{
		unordered_set<int> us;
		us.Insert(100);
		us.Insert(5);
		us.Insert(6);
		us.Insert(32);
		us.Insert(8);
		us.Insert(14);
		us.Insert(65);
		us.Insert(27);
		us.Insert(39);
 
		unordered_set<int>::iterator it = us.begin();
		while (it != us.end())
		{
			cout << *it << " ";
			++it;
		}
		cout << endl;
	}
}

UnorderedMap.h

#pragma once
#include "HashTable.h"
 
namespace yjl
{
	template<class K,class V>
	class unordered_map
	{
		//map就取kv的first
		struct MapKeyOfT
		{
			const K& operator()(const pair<K,V>& kv)
			{
				return kv.first;
			}
		};
 
	public:
		typedef typename OpenHash::HashTable<K, pair<K,V>, MapKeyOfT>::iterator iterator;
 
		iterator begin()
		{
			return _ht.begin();
		}
 
		iterator end()
		{
			return _ht.end();
		}
 
		pair<iterator, bool> Insert(const pair<K,V>& kv)
		{
			return _ht.Insert(kv);
		}
 
		V& operator[](const K& key)
		{
			pair<iterator, bool> ret = _ht.Insert(make_pair(key, V()));
			return ret.first->iterator;
		}
 
	private:
		OpenHash::HashTable<K, pair<K, V>, MapKeyOfT> _ht;
 
	};
 
	void test_unordered_map1()
	{
		unordered_map<string,string> um;
		um.Insert(make_pair("spring", "春天"));
		um.Insert(make_pair("summer", "夏天"));
		um.Insert(make_pair("autumn", "秋天"));
		um.Insert(make_pair("winter", "冬天"));
 
		unordered_map<string,string>::iterator it = um.begin();
		while (it != um.end())
		{
			cout << it->first << ":" << it->second << endl;
			++it;
		}
	}
}

Test.cpp

#define  _CRT_SECURE_NO_WARNINGS  1
#include "HashTable.h"
#include "UnorderedSet.h"
#include "UnorderedMap.h"
 
int main()
{
	delia::test_unordered_set1();
	delia::test_unordered_map1();
 
	return 0;
}

在这里插入图片描述

小能软糖_

关注

12
点赞
踩
6

收藏

觉得还不错? 一键收藏
打赏
0
评论
【C++】---STL之用哈希桶模拟实现：unordered_set和unordered_map

将__HTIterator中的_node更改为HashTable指针类型之后，由于在特定的平台上，指针所占的空间是一定的（在Win32平台上是4字节，在64位平台上是8字节），这样可以通过编译。如果我们要用哈希桶对unordered_set和unordered_map进行封装的话，我们面临一个问题那就是 unordered_set传给哈希桶的是K，而unordered_map传给哈希桶的是pair，同样我们之前也遇到过set和map的封装，是用的红黑树。迭代器最重要的部分呢~
复制链接

扫一扫