【C++】unordered_set和unordered_map的封装（哈希）

最新推荐文章于 2024-09-27 11:26:03 发布

秦jh_

最新推荐文章于 2024-09-27 11:26:03 发布

阅读量2.4k

点赞数 123

分类专栏： C++ 文章标签：哈希算法 c++ 算法数据结构

本文链接：https://blog.csdn.net/qinjh_/article/details/139753559

版权

C++ 专栏收录该内容

24 篇文章 0 订阅

订阅专栏

🌈个人主页：秦jh_-CSDN博客
🔥 系列专栏：https://blog.csdn.net/qinjh_/category_12575764.html?spm=1001.2014.3001.5482

前言

💬 hello! 各位铁子们大家好哇。

今日更新了unordered_map和unordered_set封装的相关内容
🎉 欢迎大家关注🔍点赞👍收藏⭐️留言📝

key和pair

前面已经实现了哈希的底层，现用哈希进行封装。

unordered_set和unordered_map的封装和map、set大体思路一样。hash是底层，他并不知道传入的是k还是pair，但是上层的unordered_set和unordered_map知道。所以在hash多传入一个模板参数KeyOfT，这样再在map和set中分别实现取出key的逻辑即可。

仿函数hash

由于hash现在是底层，我们的仿函数不可能直接传给hash底层，所以得在unordered_set和unordered_map上传多一个模板参数，这样取模的仿函数就可以在外面传了。

迭代器

当遍历完一个桶后，准备找下一个桶时，就需要有哈希表，不然就找不到下一个桶，所以iterator需要传第二个参数：哈希表的指针。

operator++

当当前桶走完了，就要找下一个不为空的桶的第一个节点。循环结束有两种可能，一：所有桶都走完了。二：找到下一个不为空的桶。

HashTable.h

#pragma once 
#include<vector>

template<class K>
struct HashFunc
{
	size_t operator()(const K& key)
	{
		return (size_t)key; //负数、指针都能转
	}
};

//特化
template<>
struct HashFunc<string>
{
	size_t operator()(const string& key)
	{
		size_t hash = 0;
		for (auto ch : key)
		{
			hash *= 131;
			hash += ch;
		}
		return hash;
	}
};

namespace open_address
{
	enum State
	{
		EMPTY,
		EXIST,
		DELETE
	};

	template<class K, class V>
	struct HashData
	{
		pair<K, V> _kv;
		State _state = EMPTY;
	};

	template<class K, class V, class Hash = HashFunc<K>>
	class HashTable
	{
	public:
		HashTable()
		{
			_tables.resize(10);
		}

		bool Insert(const pair<K, V>& kv)
		{
			if (Find(kv.first))  //不允许冗余
				return false;

			//扩容
			if (_n * 10 / _tables.size() >= 7)
			{
				//方法一
				//size_t newsize = _tables.size() * 2;    //用vector的话需要手动映射
				//vector<HashData<K, V>> newtables(newsize);

				旧表重新计算负载到新表
				//for(size_t i=0;i<_tables.size();i++)
				//{ }

				//方法二
				HashTable<K, V, Hash> newHT;
				newHT._tables.resize(_tables.size() * 2);

				//旧表重新计算负载到新表
				for (size_t i = 0; i < _tables.size(); i++)
				{
					if (_tables[i]._state == EXIST)
					{
						newHT.Insert(_tables[i]._kv);//用HashTable对象插入，可以复用Insert，不需要手动映射
					}								 //newHT已经是扩容好的了，就跳过扩容，直接来到探测部分
				}									 //新表插入好后，再跟旧表互换
				_tables.swap(newHT._tables);
			}
			Hash hs;
			size_t hashi = hs(kv.first) % _tables.size();  //不能模capacity,否则会得到比size大的数，而size后面的位置不能用[]得到
			//线性探测
			while (_tables[hashi]._state == EXIST)
			{
				++hashi;
				hashi %= _tables.size(); //如果往后找找不到，就回到前面继续找
			}

			_tables[hashi]._kv = kv;
			_tables[hashi]._state = EXIST;
			++_n;

			return true;
		}

		HashData<K, V>* Find(const K& key)
		{
			Hash hs;
			size_t hashi = hs(key) % _tables.size();
			//线性探测
			while (_tables[hashi]._state != EMPTY)
			{
				if (_tables[hashi]._state == EXIST &&
					_tables[hashi]._kv.first == key)
				{
					return &_tables[hashi];
				}
				++hashi;
				hashi %= _tables.size();
			}
			return nullptr;
		}

		bool Erase(const K& key)
		{
			HashData<K, V>* ret = Find(key);
			if (ret == nullptr)
			{
				return false;
			}
			else
			{
				ret->_state = DELETE;
				--_n;
				return true;
			}
		}

	private:
		vector<HashData<K, V>> _tables;
		size_t _n = 0; //有效数据个数
	};

	void TestHT1()
	{
		int a[] = { 10001,11,55,24,19,12,31 };
		HashTable<int, int> ht;
		for (auto e : a)
		{
			ht.Insert(make_pair(e, e));
		}

		cout << ht.Find(55) << endl;
		cout << ht.Find(31) << endl;

		ht.Erase(55);
		cout << ht.Find(55) << endl;
		cout << ht.Find(31) << endl;
	}

	void TestHT2()
	{
		int a[] = { 10001,11,55,24,19,12,31 };
		HashTable<int, int> ht;
		for (auto e : a)
		{
			ht.Insert(make_pair(e, e));
		}

		ht.Insert(make_pair(32, 32));
		ht.Insert(make_pair(32, 32));
	}

	//如果key不支持强转成整形取模，就要自己提供转换成整形的仿函数
	void TestHT3()
	{
		HashTable<string, int> ht;
		ht.Insert(make_pair("sort", 1));
		ht.Insert(make_pair("left", 1));
		ht.Insert(make_pair("insert	", 1));

		//cout << StringHashFunc()("abcd") << endl;
		//cout << StringHashFunc()("aadd") << endl;

	}
}


namespace hash_bucket
{
	template<class T>
	struct HashNode
	{
		T _data;
		HashNode<T>* _next;

		HashNode(const T& data)
			:_data(data)
			,_next(nullptr)
		{}
	};

	//前置声明
	//HashTable需要用到__HTIterator，__HTIterator也需要用到HashTable
	//所以只能用前置声明
	template<class K, class T, class KeyOfT, class Hash >
	class HashTable;

	//template<class K, class T,class KeyOfT, class Hash >
	//struct __HTIterator
	//{
	//	typedef HashNode<T> Node;
	//	typedef __HTIterator<K, T, KeyOfT, Hash> Self;

	//	Node* _node;
	//	HashTable<K, T, KeyOfT, Hash>* _pht;  //当前桶为空的时候需要找下一个桶，就需要表才能找到下一个桶

	//	__HTIterator(Node* node,HashTable<K,T,KeyOfT,Hash>* pht)
	//		:_node(node)
	//		,_pht(pht)
	//	{}

	//	T& operator*()
	//	{
	//		return _node->_data;
	//	}

	//	T* operator->()
	//	{
	//		return &_node->_data;
	//	}

	//	Self operator++()
	//	{
	//		if (_node->_next)
	//		{
	//			//当前桶没走完，找当前桶的下一个节点
	//			_node = _node->_next;
	//		}
	//		else
	//		{
	//			//当前桶走完了，找下一个不为空的桶的第一个节点
	//			KeyOfT kot;
	//			Hash hs;
	//			size_t i = hs(kot(_node->_data)) % _pht->_tables.size(); 
	//			++i;
	//			for (; i < _pht->_tables.size(); i++)
	//			{
	//				if (_pht->_tables[i])
	//					break;
	//			}

	//			if (i == _pht->_tables.size())
	//			{
	//				//所有桶都走完了，最后一个的下一个用nullptr标记
	//				_node = nullptr;
	//			}
	//			else
	//			{
	//				_node = _pht->_tables[i];
	//			}
	//		}
	//		return *this;
	//	}

	//	bool operator!=(const Self& s)
	//	{
	//		return _node != s._node;
	//	}
	//};


	template<class K ,class T,class KeyOfT, class Hash >
	class HashTable
	{
		typedef HashNode<T> Node;
	public:
		//__HTIterator需要访问HashTable的私有，所以用友元
		/*template<class K, class T, class KeyOfT, class Hash >
		friend struct __HTIterator;*/

		//内部类
		template<class Ptr, class Ref>
		struct __HTIterator 
		{
			typedef HashNode<T> Node;
			typedef __HTIterator Self;

			Node* _node;
			const HashTable* _pht;  //当前桶为空的时候需要找下一个桶，就需要表才能找到下一个桶

			__HTIterator(Node* node,const HashTable* pht)
				:_node(node)
				,_pht(pht)
			{}

			Ref operator*()
			{
				return _node->_data;
			}

			Ptr operator->()
			{
				return &_node->_data;
			}

			Self operator++()
			{
				if (_node->_next)
				{
					//当前桶没走完，找当前桶的下一个节点
					_node = _node->_next;
				}
				else
				{
					//当前桶走完了，找下一个不为空的桶的第一个节点
					KeyOfT kot;
					Hash hs;
					size_t i = hs(kot(_node->_data)) % _pht->_tables.size(); 
					++i;
					for (; i < _pht->_tables.size(); i++)
					{
						if (_pht->_tables[i])
							break;
					}

					if (i == _pht->_tables.size())
					{
						//所有桶都走完了，最后一个的下一个用nullptr标记
						_node = nullptr;
					}
					else
					{
						_node = _pht->_tables[i];
					}
				}
				return *this;
			}

			bool operator!=(const Self& s)
			{
				return _node != s._node;
			}
		};

		
		
		typedef __HTIterator<T*,T&> iterator;
		typedef __HTIterator<const T*,const T&> const_iterator;
		

		iterator begin()
		{
			for (size_t i = 0; i < _tables.size(); i++)
			{
				Node* cur = _tables[i];
				if (cur)
				{	//this ->HashTable*			//begin写在HashTable里面，
					return iterator(cur, this);//this：返回的是哈希表的指针
				}
			}
			return end();
		}

		iterator end()
		{
			return iterator(nullptr, this);
		}

		const_iterator begin()const
		{
			for (size_t i = 0; i < _tables.size(); i++)
			{
				Node* cur = _tables[i];
				if (cur)
				{	//this ->const HashTable*			//begin写在HashTable里面，
					return const_iterator(cur, this);//this：返回的是哈希表的指针
				}
			}
			return end();
		}

		const_iterator end()const 
		{
			return const_iterator(nullptr, this);
		}

		HashTable()
		{
			_tables.resize(10, nullptr);
			_n = 0;
		}
		
		~HashTable()
		{
			for (size_t i = 0; i < _tables.size(); i++)
			{
				Node* cur = _tables[i];
				while (cur)
				{
					Node* next = cur->_next;
					delete cur;

					cur = next;
				}
				_tables[i] = nullptr;
			}
		}

		pair<iterator,bool> Insert(const T& data)
		{
			KeyOfT kot;
			iterator it = Find(kot(data));
			if (it!= end())
				return make_pair(it,false);

			Hash hs;
			//扩容
			//负载因子为1时扩容
			if (_n == _tables.size())
			{
				//HashTable<K, V> newHT;
				//newHT._tables.resize(_tables.size() * 2);

				旧表重新计算负载到新表
				//for (size_t i = 0; i < _tables.size(); i++)
				//{
				//	Node* cur = _tables[i];
				//	while(cur)
				//	{
				//		newHT.Insert(cur->_kv);
				//		cur = cur->_next;
				//	}								 
				//}									
				//_tables.swap(newHT._tables);

				vector<Node*> newTables(_tables.size() * 2, nullptr); 
				for (size_t i = 0; i < _tables.size(); i++) 
				{
					Node* cur = _tables[i];  
					while(cur)
					{ 
						Node* next = cur->_next;  
						//头插到新表的位置
						size_t hashi = hs(kot(cur->_data)) % newTables.size(); 
						cur->_next = newTables[hashi];   
						newTables[hashi] = cur;   

						cur = next; 
					}				 
					_tables[i] = nullptr; 
				}									
				_tables.swap(newTables);  
			}

			size_t hashi = hs(kot(data)) % _tables.size(); 
			Node* newnode = new Node(data); 
			//头插
			newnode->_next = _tables[hashi]; 
			_tables[hashi] = newnode; 
			++_n; 

			return make_pair(iterator(newnode,this),true);
		}

		iterator Find(const K& key)
		{
			KeyOfT kot;
			Hash hs;
			size_t hashi = hs(key) % _tables.size();
			Node* cur = _tables[hashi];
			while (cur)
			{
				if (kot(cur->_data) == key)
				{
					return iterator(cur,this);
				}
				cur = cur->_next;
			}
			return end();
		}

		bool Erase(const K& key)
		{
			KeyOfT kot; 
			Hash hs;
			size_t hashi = hs(key) % _tables.size(); 
			Node* prev = nullptr;
			Node* cur = _tables[hashi]; 
			while (cur) 
			{
				if (kot(cur->_data) == key)
				{
					//如果删除的是第一个
					if (prev == nullptr)
					{
						_tables[hashi] = cur->_next;
					}
					else
					{
						prev->_next = cur->_next;
					}

					delete cur;
					return true;
				}
				else
				{
					prev = cur;
					cur = cur->_next;
				}
			}
			return false;
		}

	private:
		vector<Node*> _tables;  //指针数组
		size_t _n;
		
		//vector<list<pair<K, V>>> _tables;
	};

	/*void TestHT1()
	{
		int a[] = { 10001,11,55,24,19,12,31,4,34,44 };
		HashTable<int, int> ht;
		for (auto e : a)
		{
			ht.Insert(make_pair(e, e));
		}

		ht.Insert(make_pair(32, 32));
		ht.Insert(make_pair(32, 32));

		ht.Erase(31);
		ht.Erase(11);

	}

	void TestHT2()
	{
		HashTable<string, int> ht; 
		ht.Insert(make_pair("sort", 1));
		ht.Insert(make_pair("left", 1));
		ht.Insert(make_pair("insert	", 1));
	}*/

}

my_unordered_map.h

#pragma once


namespace qjh
{
	template<class K,class V, class Hash = HashFunc<K>>
	class unordered_map
	{
		struct MapKeyOfT 
		{
			const K& operator()(const pair<K, V>& kv) 
			{
				return kv.first; 
			} 
		};

	public:
		typedef typename hash_bucket::HashTable<K, pair<const K,V>, MapKeyOfT, Hash >::iterator iterator; 
		typedef typename hash_bucket::HashTable<K, pair<const K, V>, MapKeyOfT, Hash >::const_iterator const_iterator;

		iterator begin()
		{
			return _ht.begin();
		}

		iterator end()
		{
			return _ht.end();
		}

		const_iterator begin() const
		{
			return _ht.begin();
		}

		const_iterator end() const
		{
			return _ht.end();
		}

		V& operator[](const K& key)
		{
			pair<iterator, bool> ret = insert(make_pair(key, V()));
			return ret.first->second;
		}

		pair<iterator, bool> insert(const pair<K, V>& key)
		{
			return _ht.Insert(key);
		}

	private:
		hash_bucket::HashTable<K, pair<const K, V>, MapKeyOfT,Hash > _ht;
		
	};
	

	void test_unordered_map1()
	{
		string arr[] = { "苹果","西瓜","苹果","西瓜","苹果","苹果","西瓜",
			"苹果","香蕉","苹果","香蕉","苹果","草莓","苹果","草莓" };
		unordered_map<string, int> countmap;
		for (auto& e : arr)
		{
			countmap[e]++;
		}

		unordered_map<string,int>::iterator it = countmap.begin();
		while (it != countmap.end())
		{
			//it->first += 'x';  //key不能修改
			it->second += 1;     //value可以修改
			cout << it->first << ":" << it->second << endl;
			++it;
		}
		cout << endl;

		for (auto& kv : countmap)
		{
			cout << kv.first << ":" << kv.second << endl;
		}
		cout << endl;
	}
}

my_unordered_set.h

#pragma once

#include"HashTable.h"

namespace qjh
{
	template<class K,class Hash = HashFunc<K>>
	class unordered_set
	{
		struct SetKeyOfT
		{
			const K& operator()(const K& key) 
			{
				return key; 
			}
		};

	public:
		typedef typename hash_bucket::HashTable<K, const K, SetKeyOfT, Hash>::iterator iterator;
		typedef typename hash_bucket::HashTable<K, const K, SetKeyOfT, Hash>::const_iterator const_iterator;

		iterator begin()
		{
			return _ht.begin();
		}

		iterator end()
		{
			return _ht.end();
		}

		const_iterator begin() const
		{
			return _ht.begin();
		}

		const_iterator end() const
		{
			return _ht.end();
		}

		pair<iterator, bool> insert(const K& key)
		{
			return _ht.Insert(key);
		}

		iterator find(const K& key)
		{
			return _ht.Find(key);
		}

		bool erase(const K& key)
		{
			return _ht.Erase(key);
		}

	private:
		hash_bucket::HashTable<K,const K, SetKeyOfT, Hash> _ht;
	};

	void Func(const unordered_set<int>& s)
	{
		unordered_set<int>::iterator it = s.begin();
		while (it != s.end())
		{
			//*it = 1;
			cout << *it << " ";
			++it;
		}
		cout << endl;
	}

	void test_unordered_set()
	{
		unordered_set<int> s;
		s.insert(31);
		s.insert(11);
		s.insert(5);
		s.insert(15);
		s.insert(25);

		unordered_set<int>::iterator it = s.begin();
		while (it != s.end())
		{
			//*it = 1;
			cout << *it << " ";
			++it;
		}
		cout << endl;

		for (auto e : s)
		{
			cout << e << " ";
		}
		cout << endl;
	}
}