unordered_set && unordered_map 的封装

---徐乾---

已于 2024-05-16 21:53:58 修改

阅读量256

点赞数 2

分类专栏： C++ 文章标签：数据结构 C++

于 2023-10-16 23:15:21 首次发布

本文链接：https://blog.csdn.net/m0_62229058/article/details/133579618

版权

C++ 专栏收录该内容

38 篇文章 0 订阅

订阅专栏

1. unordered_map和unordered_set的封装

1.1. 普通迭代器

1.2. unordered_set的完整实现

1.3. unordered_map的完整实现

1.4. 底层的哈希表 (开散列实现)

1. unordered_map和unordered_set的封装

可以看到：

unordered_set 底层用的哈希表的第二个模板参数是一个Key；
unordered_map 底层用的哈希表的第二个模板参数是一个pair。

因此在哈希表这一层，它并不知道它的第二个模板参数究竟是什么？也许是pair，也许是一个K。但是对于unordered_set和unordered_map这一层来说，它们是知道自己的第二个模板参数是什么的，如果是unordered_map，那么第二个参数就是pair，如果是unordered_set，那么第二个参数就是K。

与红黑树一样，这里就需要让哈希表具有一个功能，获得 unordered_set 中的 Key 和获得 unordered_map 中的 pair 中的 Key，因此，此时的哈希表需要一个类模板参数，这个模板参数是一个仿函数，哈希表通过这个模板参数获得 set 中的 Key和 map 中 pair 中的 Key。

具体如下：

#pragma once
#include "hush_bucket.h"

namespace Xq
{
	template<class K,class Hash = hash_func<K>>
	class unordered_set
	{
	public:
		struct unordered_set_key_of_data
		{
			const K& operator()(const K& key)
			{
				return key; 
			}
		};
	private:
		typedef Xq::hash_table<K, K, Hash, unordered_set_key_of_data> hash_table;
	private:
		hash_table _table;
	};
}

#pragma once
#include "hush_bucket.h"

namespace Xq
{
	template<class K, class V, class Hash = hash_func<K>>
	class unordered_map
	{	
	public:
		struct unordered_map_key_of_data
		{
			const K& operator()(const std::pair<K,V>& kv)
			{
				return kv.first;
			}
		};
	private:
		typedef Xq::hash_table<K, std::pair<K, V>, Hash, unordered_map_key_of_data> hash_table;
	private:
		hash_table _table;
	};
}

因为站在哈希表的角度，它并不知道它的节点要存储什么数据，也许是一个 Key，也许是一个 pair，故哈希表的节点数据类型不能写死，而应该是一个泛型，如下：

#pragma once
#include <iostream>
#include <utility>
#include <vector>
#include <time.h>
#include <string>

namespace Xq
{
	//hash_func这个仿函数的主要目的:将不能转化为size_t类型用特殊方式转化为size_t,以便于支持取模操作
	template<class K>
	struct hash_func
	{
		size_t operator()(const K& key)
		{
			return (size_t)key;
		}
	};

	// 例如在这里,string默认是不可以进行取模运算的
	// 因此在这里利用类模板的特化,针对string特殊处理
	template<>
	struct hash_func<std::string>
	{
		size_t operator()(const std::string& str)
		{
			size_t ret = 0;
			for (auto ch : str)
			{
                ret *= 131;
				ret += ch;
			}
			return ret;
		}
	};

	// 节点里面存放的数据(_data)究竟是什么类型,我不知道,但我可以通过unordered_map
	// 或者unordered_set传递过来的第二个模板参数推出它是什么类型
	template<class D>
	struct hash_table_node
	{
		struct hash_table_node<D>* _next;
		D _data;
		hash_table_node(const D& data = D())
			:_data(data)
			, _next(nullptr)
		{}
	};

	// 第三个模板参数作用:将一些数据类型(不能取模操作的类型)转化为可以进行取模操作的类型
	// 第四个模板参数作用:推出第二个模板参数究竟是什么类型
	template <class K, class D,class Hash,class Key_Of_Data>
	class hash_table
	{
	private:
		typedef hash_table_node<D> node;

	public:
		static const size_t _table_size = 28;    // 静态数组的大小
		static const size_t _table_count_arr[_table_size];    // 哈希表的大小(每个都是素数)
		hash_table() :_size(0){}

		size_t get_prime_size(size_t size)
		{
			for (size_t i = 0; i < _table_size; ++i)
			{
				if (i == 28) break;
				if (_table_count_arr[i] > size)
					return _table_count_arr[i];
			}
			return -1;
		}

		bool insert(const D& data)
		{
			Hash hash_func;
			Key_Of_Data kod;

			// 去重
			if (find(kod(data))) return false;

			// 扩容  
			// 空表或者负载因子>=1 进行扩容
			if (_table.size() == 0 || _size * 10 / _table.size() >= 10)
			{
				//在这里我们选择建立新的vector,将旧表的vector中的数据导入新vector,在交换这两个vector即可
				std::vector<node*> new_table;
				new_table.resize(get_prime_size(_table.size()), nullptr);
				// 将旧表的有效节点摘下来,头插到新表
				for (size_t i = 0; i < _table.size(); ++i)
				{
					// 如果当前节点不为空,说明有数据
					// 在这里我们选择用头插
					while(_table[i])
					{
						// 提前保存下一个节点的位置
						node* next = _table[i]->_next;
						size_t pos = hash_func(kod(_table[i]->_data)) % new_table.size();
						_table[i]->_next = new_table[pos];
						new_table[pos] = _table[i];
						_table[i] = next;
					}
				}
				// 交换两个表,扩容结束
				std::swap(_table, new_table);
			}

			// 直接以头插的方式插入
			size_t pos = hash_func(kod(data)) % _table.size();
			node* newnode = new node(data);
			newnode->_next = _table[pos];
			_table[pos] = newnode;
			++_size;
			return true;
		}

		node* find(const K& key)
		{
			Hash hash_func;
			Key_Of_Data kod;
			// 空表,直接返回空
			if (_size == 0) return nullptr;
			size_t obj_pos = hash_func(key) % _table.size();
			node* cur = _table[obj_pos];
			while (cur)
			{
				if (kod(cur->_data) == key)
					return cur;
				cur = cur->_next;
			}
			return nullptr;
		}

		bool erase(const K& key)
		{
			Hash hash_func;
			Key_Of_Data kod;
			if (!find(key) || _size == 0) return false;
			size_t pos = hash_func(key) % _table.size();
			//头删
			node* cur = _table[pos];
			if (kod(cur->_data) == key)
			{
				node* next = cur->_next;
				delete cur;
				_table[pos] = next;
			}
			// !头删
			else
			{
				while (kod(cur->_next->_data) != key)
				{
					cur = cur->_next;
				}
				node* next = cur->_next->_next;
				delete cur->_next;
				cur->_next = next;
			}
			--_size;
			return true;
		}

	private:
		std::vector<node*> _table;
		size_t _size;  // 存储有效数据的个数
	};
	template<class K, class V, class Hash = hash_func<K>, class Key_Of_Data>
	const size_t hash_table<K, V, Hash, Key_Of_Data>::_table_count_arr[hash_table<K, V, Hash, Key_Of_Data>::_table_size] =      // 哈希表的大小(每个都是素数)
	{
		53, 97, 193, 389, 769,
		1543, 3079, 6151, 12289, 24593,
		49157, 98317, 196613, 393241, 786433,
		1572869, 3145739, 6291469, 12582917, 25165843,
		50331653, 100663319, 201326611, 402653189, 805306457,
		1610612741, 3221225473, 4294967291
	};
}

1.1. 普通迭代器

第一步我们完成了，接下来应该实现我们的普通迭代器

// 编译器只会向上查找,因此需要在这里声明
template <class K, class D, class Hash, class Key_Of_Data>
class hash_table;

template <class K, class D, class Hash, class Key_Of_Data>
struct _hash_table_iterator
{
	typedef hash_table<K, D, Hash, Key_Of_Data> hash_table;
	typedef hash_table_node<D> node;
	typedef _hash_table_iterator<K, D, Hash, Key_Of_Data> Self;

	_hash_table_iterator(hash_table* hpt, node* node) :_table_ptr(hpt), _node(node){}

	// 返回数据的引用
	D& operator*()
	{
		return _node->_data;
	}
	// 返回数据的地址
	D* operator->()
	{
		return &(operator*());
	}

	bool operator!=(Self& s)
	{
		return _node != s._node;
	}

	bool operator==(Self& s)
	{
		return _node == s._node;
	}
	// 前置++
	Self& operator++()
	{
		// 提前保存当前位置
		node* old = _node;
		_node = _node->_next;
		// 如果当前桶走到空了,去找下一个非空的桶
		if (!_node)
		{
			Hash hash_func;
			Key_Of_Data kod;
			// 需要找到下一个非空的哈希桶
			size_t cur_pos = hash_func(kod(old->_data)) % _table_ptr->_table.size();
			cur_pos++;
			while (cur_pos < _table_ptr->_table.size() && !_table_ptr->_table[cur_pos])
			{
				++cur_pos;
			}
			if (cur_pos == _table_ptr->_table.size())
				_node = nullptr;
			else
				_node = _table_ptr->_table[cur_pos];
		}
		return *this;
	}

	// 需要一个节点以及哈希表的指针
	hash_table* _table_ptr;
	node* _node;
};

1.2. unordered_set的完整实现

#pragma once
#include "hush_bucket.h"

namespace Xq
{
	template<class K,class Hash = hash_func<K>>
	class unordered_set
	{
	public:
		struct unordered_set_key_of_data
		{
			const K& operator()(const K& key)
			{
				return key; 
			}
		};
		typedef typename Xq::_hash_table_iterator<K, K, Hash, unordered_set_key_of_data> iterator;

		std::pair<iterator,bool> insert(const K& key)
		{
			return _table.insert(key);
		}

		iterator find(const K& key)
		{
			return _table.find();
		}

		iterator erase(const K& key)
		{
			return _table.erase();
		}

		iterator begin()
		{
			return _table.begin();
		}

		iterator end()
		{
			return _table.end();
		}

	private:
		// 第二个模板参数确定哈希表里面存放的数据类型
		typedef Xq::hash_table<K, K, Hash, unordered_set_key_of_data> hash_table;
	private:
		hash_table _table;
	};
}

1.3. unordered_map的完整实现

#pragma once
#include "hush_bucket.h"

namespace Xq
{
	template<class K, class V, class Hash = hash_func<K>>
	class unordered_map
	{	
	public:
		struct unordered_map_key_of_data
		{
			const K& operator()(const std::pair<K,V>& kv)
			{
				return kv.first;
			}
		};

		typedef typename Xq::_hash_table_iterator<K, std::pair<K, V>, Hash, unordered_map_key_of_data> iterator;

		std::pair<iterator,bool> insert(const std::pair<K, V>& kv)
		{
			return _table.insert(kv);
		}

		V& operator[](const K& key)
		{
			return _table.insert(std::make_pair(key, V())).first->second;
		}

		iterator find(const K& key)
		{
			return _table.find(key);
		}

		iterator erase(const K& key)
		{
			return _table.erase(key);
		}

		iterator begin()
		{
			return _table.begin();
		}

		iterator end()
		{
			return _table.end();
		}

	private:
		// 第二个模板参数确定哈希表里面存放的数据类型
		typedef Xq::hash_table<K, std::pair<K, V>, Hash, unordered_map_key_of_data> hash_table;
	private:
		hash_table _table;
	};
}

1.4. 底层的哈希表 (开散列实现)

#pragma once
#include <iostream>
#include <utility>
#include <vector>
#include <time.h>
#include <string>

namespace Xq
{
	//hash_func这个仿函数的主要目的:将不能转化为size_t类型用特殊方式转化为size_t,以便于支持取模操作
	template<class K>
	struct hash_func
	{
		size_t operator()(const K& key)
		{
			return (size_t)key;
		}
	};

	// 例如在这里,string默认是不可以进行取模运算的
	// 因此在这里利用类模板的特化,针对string特殊处理
	template<>
	struct hash_func<std::string>
	{
		size_t operator()(const std::string& str)
		{
			size_t ret = 0;
			// 具体这里为什么要乘于131,请看解释(1)
			for (auto ch : str)
			{
                ret *= 131;
				ret += ch;
			}
			return ret;
		}
	};

	// 节点里面存放的数据(_data)究竟是什么类型,我不知道,但我可以通过unordered_map
	// 或者unordered_set传递过来的第二个模板参数推出它是什么类型
	template<class D>
	struct hash_table_node
	{
		struct hash_table_node<D>* _next;
		D _data;
		hash_table_node(const D& data = D())
			:_data(data)
			, _next(nullptr)
		{}
	};
	// 编译器只会向上查找,因此需要在这里声明
	template <class K, class D, class Hash, class Key_Of_Data>
	class hash_table;

	template <class K, class D, class Hash, class Key_Of_Data>
	struct _hash_table_iterator
	{
		typedef hash_table<K, D, Hash, Key_Of_Data> hash_table;
		typedef hash_table_node<D> node;
		typedef _hash_table_iterator<K, D, Hash, Key_Of_Data> Self;

		_hash_table_iterator(hash_table* hpt, node* node) :_table_ptr(hpt), _node(node){}

		// 返回数据的引用
		D& operator*()
		{
			return _node->_data;
		}
		// 返回数据的地址
		D* operator->()
		{
			return &(operator*());
		}

		bool operator!=(Self& s)
		{
			return _node != s._node;
		}

		bool operator==(Self& s)
		{
			return _node == s._node;
		}
		// 前置++
		Self& operator++()
		{
			// 提前保存当前位置
			node* old = _node;
			_node = _node->_next;
			// 如果当前桶走到空了,去找下一个非空的桶
			if (!_node)
			{
				Hash hash_func;
				Key_Of_Data kod;
				// 需要找到下一个非空的哈希桶
				size_t cur_pos = hash_func(kod(old->_data)) % _table_ptr->_table.size();
				cur_pos++;
				while (cur_pos < _table_ptr->_table.size() && !_table_ptr->_table[cur_pos])
				{
					++cur_pos;
				}
				if (cur_pos == _table_ptr->_table.size())
					_node = nullptr;
				else
					_node = _table_ptr->_table[cur_pos];
			}
			return *this;
		}

		// 需要一个节点以及哈希表的指针
		hash_table* _table_ptr;
		node* _node;
	};

	// 第三个模板参数作用:将一些数据类型(不能取模操作的类型)转化为可以进行取模操作的类型
	// 第四个模板参数作用:推出第二个模板参数究竟是什么类型
	template <class K, class D,class Hash,class Key_Of_Data>
	class hash_table
	{
	private:
		typedef hash_table_node<D> node;
		friend struct _hash_table_iterator<K, D, Hash, Key_Of_Data>;
		typedef _hash_table_iterator<K, D, Hash, Key_Of_Data> iterator;
	public:
		static const size_t _table_size = 28;    // 静态数组的大小
		static const size_t _table_count_arr[_table_size];    // 哈希表的大小(每个都是素数)
		hash_table() :_size(0){}

		~hash_table()
		{
			for (auto& ptr : _table)
			{
				while (ptr)
				{
					node* next = ptr->_next;
					delete ptr;
					ptr = next;
				}
			}
		}

		iterator begin()
		{
			size_t i = 0;
			for (; i < _table.size(); ++i)
			{
				if (_table[i])
					break;
			}
			if (i == _table.size()) return iterator(this, nullptr);
			else return iterator(this, _table[i]);
		}

		iterator end()
		{
			return iterator(this, nullptr);
		}

		size_t get_prime_size(size_t size)
		{
			for (size_t i = 0; i < _table_size; ++i)
			{
				if (i == 28) break;
				if (_table_count_arr[i] > size)
					return _table_count_arr[i];
			}
			return -1;
		}

		std::pair<iterator,bool> insert(const D& data)
		{
			Hash hash_func;
			Key_Of_Data kod;

			// 去重
			iterator obj = find(kod(data));
			if (obj._node) return std::make_pair(obj,false);

			// 扩容  
			// 空表或者负载因子>=1 进行扩容
			if (_table.size() == 0 || _size * 10 / _table.size() >= 10)
			{
				//在这里我们选择建立新的vector,将旧表的vector中的数据导入新vector,在交换这两个vector即可
				std::vector<node*> new_table;
				new_table.resize(get_prime_size(_table.size()), nullptr);
				// 将旧表的有效节点摘下来,头插到新表
				for (size_t i = 0; i < _table.size(); ++i)
				{
					// 如果当前节点不为空,说明有数据
					// 在这里我们选择用头插
					while(_table[i])
					{
						// 提前保存下一个节点的位置
						node* next = _table[i]->_next;
						size_t pos = hash_func(kod(_table[i]->_data)) % new_table.size();
						_table[i]->_next = new_table[pos];
						new_table[pos] = _table[i];
						_table[i] = next;
					}
				}
				// 交换两个表,扩容结束
				std::swap(_table, new_table);
			}

			// 直接以头插的方式插入
			size_t pos = hash_func(kod(data)) % _table.size();
			node* newnode = new node(data);
			newnode->_next = _table[pos];
			_table[pos] = newnode;
			++_size;
			return std::make_pair(iterator(this,newnode),true);
		}



		iterator find(const K& key)
		{
			Hash hash_func;
			Key_Of_Data kod;
			// 空表,直接返回空
			if (_size == 0) return iterator(this, nullptr);
			size_t obj_pos = hash_func(key) % _table.size();
			node* cur = _table[obj_pos];
			while (cur)
			{
				if (kod(cur->_data) == key)
					return iterator(this, cur);
				cur = cur->_next;
			}
			return iterator(this, nullptr);
		}

		size_t get_effective_next_node(size_t cur_pos)
		{
			++cur_pos;
			while (cur_pos < _table.size() && !_table[cur_pos])
			{
				++cur_pos;
			}
			if (cur_pos == _table.size())
				return -1;
			else
				return cur_pos;
		}
        // 返回被删除节点的下一个有效节点
		iterator erase(const K& key)
		{
			Hash hash_func;
			Key_Of_Data kod;
			if (!(find(key)._node) || _size == 0) return iterator(this,nullptr);
			size_t pos = hash_func(key) % _table.size();
			//头删
			node* cur = _table[pos];
			node* tmp = nullptr;
			if (kod(cur->_data) == key)
			{
				node* next = cur->_next;
				delete _table[pos];
				_table[pos] = next;
				tmp = _table[pos];
				if (!tmp)
				{
					// 去找下一个有效位置
					size_t ret = get_effective_next_node(pos);
					if (ret == -1)
						tmp = nullptr;
					else
						tmp = _table[ret];
				}
			}
			// !头删
			else
			{
				while (kod(cur->_next->_data) != key)
				{
					cur = cur->_next;
				}
				
				node* next = cur->_next->_next;
				delete cur->_next;
				cur->_next = next;
				tmp = cur->_next;
				if (!tmp)
				{
					size_t ret = get_effective_next_node(pos);
					if (ret == -1)
						tmp = nullptr;
					else
						tmp = _table[ret];
				}
			}
			--_size;
			return iterator(this,tmp); 
		}

	private:
		std::vector<node*> _table;
		size_t _size;  // 存储有效数据的个数
	};
	template<class K, class V, class Hash = hash_func<K>, class Key_Of_Data>
	const size_t hash_table<K, V, Hash, Key_Of_Data>::_table_count_arr[hash_table<K, V, Hash, Key_Of_Data>::_table_size] =      // 哈希表的大小(每个都是素数)
	{
		53, 97, 193, 389, 769,
		1543, 3079, 6151, 12289, 24593,
		49157, 98317, 196613, 393241, 786433,
		1572869, 3145739, 6291469, 12582917, 25165843,
		50331653, 100663319, 201326611, 402653189, 805306457,
		1610612741, 3221225473, 4294967291
	};
}