C++之unordered_map，unordered_set模拟实现

最新推荐文章于 2024-10-17 15:47:06 发布

搬砖狗(●—●)

最新推荐文章于 2024-10-17 15:47:06 发布

阅读量130

点赞数 1

分类专栏： C++ 文章标签： c++ 开发语言

本文链接：https://blog.csdn.net/2303_77100822/article/details/132904900

版权

C++ 专栏收录该内容

31 篇文章 0 订阅

订阅专栏

unordered_map，unordered_set模拟实现

哈希表源代码
哈希表模板参数的控制
仿函数增加
正向迭代器实现
unordered_set实现
unordered_map实现
map/set 与 unordered_map/unordered_set对比
哈希表调整后代码

哈希表源代码

template<class K>
struct HashFunc
{
	size_t operator()(const K& key)
	{
		//所有类型都强转为size_t类型
		return (size_t)key;
	}
};

//模板特化
template<>
struct HashFunc<string>
{
	size_t operator()(const string& key)
	{
		size_t val = 0;
		for (auto ch : key)
		{
			val *= 131;
			val += ch;
		}

		return val;
	}
};
namespace HashBucket
{
	template<class K, class V>
	struct HashNode
	{
		pair<K, V> _kv;
		HashNode<K, V>* _next;

		//构造函数
		HashNode(const pair<K, V>& kv)
			:_kv(kv)
			,_next(nullptr)
		{}
	};

	template<class K, class V, class Hash = HashFunc<K>>
	class HashTable
	{
		typedef HashNode<K, V> Node;
	public:

		~HashTable()
		{
			for (size_t i = 0; i < _tables.size(); i++)
			{
				Node* cur = _tables[i];
				while (cur)
				{
					Node* next = cur->_next;
					delete cur;
					cur = next;
				}
				_tables[i] = nullptr;
			}
		}

		inline size_t __stl_next_prime(size_t n)
		{
			static const size_t __stl_num_primes = 28;
			static const size_t __stl_prime_list[__stl_num_primes] =
			{
				53, 97, 193, 389, 769,
				1543, 3079, 6151, 12289, 24593,
				49157, 98317, 196613, 393241, 786433,
				1572869, 3145739, 6291469, 12582917, 25165843,
				50331653, 100663319, 201326611, 402653189, 805306457,
				1610612741, 3221225473, 4294967291
			};

			for (size_t i = 0; i < __stl_num_primes; ++i)
			{
				if (__stl_prime_list[i] > n)
				{
					return __stl_prime_list[i];
				}
			}

			return -1;
		}
		bool Insert(const pair<K, V>& kv)
		{
			//如果该键值对存在，就返回false
			if (Find(kv.first))
			{
				return false;
			}

			Hash hash;
			//如果负载因子为1就扩容
			if (_size == _tables.size())
			{
				//创建一个新的哈希表
				vector<Node*> newTables;
				size_t newSizes = _size == 0 ? 10 : 2 * _tables.size();

				//将每个元素初始化为空
				newTables.resize(__stl_next_prime(_tables.size()), nullptr);

				//将旧表结点插入到新表当中
				for (size_t i = 0; i < _tables.size(); i++)
				{
					Node* cur = _tables[i];

					while (cur)
					{
						//记录cur的下一个结点
						Node* next = cur->_next;
						//计算相应的哈希桶编号
						size_t hashi = hash(cur->_kv.first) % newTables.size();

						//将旧表结点移动值新表
						cur->_next = newTables[hashi];
						newTables[hashi] = cur;
						cur = next;
					}
					_tables[i] = nullptr;
				}
				_tables.swap(newTables);
			}
			//计算哈希桶编号
			size_t hashi = hash(kv.first) % _tables.size();

			//插入结点
			Node* newnode = new Node(kv);
			newnode->_next = _tables[hashi];
			_tables[hashi] = newnode;

			//元素个数++
			_size++;
			return true;
		}
		//查找
		Node* Find(const K& key)
		{
			//哈希表为空就返回空
			if (_tables.size() == 0)
			{
				return nullptr;
			}
			Hash hash;
			//计算哈希地址
			size_t hashi = hash(key) % _tables.size();
			Node* cur = _tables[hashi];
			//遍历哈希桶
			while (cur)
			{
				if ((cur->_kv.first) == key)
				{
					return cur;
				}
				cur = cur->_next;
			}
			return nullptr;
		}
		//删除
		bool Erase(const K& key)
		{
			//哈希表大小为0，删除失败
			if (_tables.size() == 0)
			{
				return false;
			}
			Hash hash;
			//计算哈希地址
			size_t hashi = hash(key) % _tables.size();

			Node* prev = nullptr;
			Node* cur = _tables[hashi];
			//遍历哈希桶，寻找删除结点是否存在
			while (cur)
			{
				if (hash(hash(cur->_kv.first)) == key)
				{
					if (prev)
					{
						prev->_next = cur->_next;
					}
					else
					{
						_tables[hashi] = cur->_next;
					}
					//删除该结点
					delete cur;
					_size--;

					return true;
				}

				prev = cur;
				cur = cur->_next;
			}
			//删除结点不存在，返回false
			return false;
		}

		size_t Size()
		{
			return _size;
		}

		size_t TableSize()
		{
			return _tables.size();
		}

		size_t BucketNum()
		{
			size_t num = 0;
			for (size_t i = 0; i < _tables.size(); i++)
			{
				if (_tables[i])
				{
					num++;
				}
			}
			return num;
		}
	private:
		vector<Node*> _tables;
		size_t _size = 0;
	};
}

哈希表模板参数的控制

unordered_set属于K模型，unordered_map属于KV模型，但是在底层上我们都是用一个哈希表来实现的，所以我们需要将哈希表的第二个参数设置为T。

template<class K, class T>
	struct HashNode
	{
		T _data;
		HashNode<T>* _next;

		//构造函数
		HashNode(const T& data)
			:_data(data)
			,_next(nullptr)
		{}
	};
	template<class K, class T, class Hash = HashFunc<K>>
	class HashTable
	{
		typedef HashNode<T> Node;
	public:
	//......
	private:
	vector<Node*> _tables;
	size_t _size = 0;
};

T模板参数可能只是键值Key，也可能是由Key和Value共同构成的键值对。如果是unordered_set容器，那么它传入底层红黑树的模板参数就是Key和Key：

template<class K, class Hash = HashFunc<K>>
class unorder_set
{
public:
	//...
private:
	HashBucket::HashTable<K, K, Hash> _ht;
};

如果是unordered_map容器，那么它传入底层红黑树的模板参数就是Key和Value：

template<class K, class V, class Hash = HashFunc<K>>
class unorder_map
{
public:
	//...
private:
	HashBucket::HashTable<K, pair<K, V>, Hash> _ht;
};

在这里插入图片描述

仿函数增加

对于unordered_set容器，我们需要进行键值比较就是对key值进行比较，也就是直接比较T就可以了，但是对于unordered_map容器来说，我们需要比较的是键值对<key，value>中的key，我们需要先将key提取出来，在进行比较。

所以，我们需要在上层unordered_set和unordered_map中各提供一个仿函数，根据传入的T类型分开进行比较操作：

map仿函数：

template<class K, class V, class Hash = HashFunc<K>>
class unorder_map
{
	struct MapKeyOfT
	{
		const K& operator()(const pair<K, V>& kv)
		{
			return kv.first;
		}
	};
public:
	//...
private:
	HashBucket::HashTable<K, pair<K, V>, Hash, MapKeyOfT> _ht;
};

set仿函数：

template<class K, class Hash = HashFunc<K>>
class unorder_set
{
	struct SetKeyOfT
	{
		const K& operator()(const K& key)
		{
			return key;
		}
	};
public:
	//...
private:
	HashBucket::HashTable<K, K, Hash, SetKeyOfT> _ht;
};

在这里插入图片描述

正向迭代器实现

哈希表只存在正向迭代器，哈希表的正向迭代器实际上是对整个哈希表进行了封装：

//前置声明
template<class K, class T, class Hash, class KeyOfT>
class HashTable;

template<class K, class T, class Hash, class KeyOfT>
struct __HashIterator
{
	typedef HashNode<T> Node;
	typedef HashTable <K, T, Hash, KeyOfT> HT;
	typedef __HashIterator <K, T, Hash, KeyOfT> Self;

	
	Node* _node;
	HT* _pht;
}

*运算符重载

解引用操作就是返回单链表某个结点的数据：

T& operator*()
{
	return _node->_data;
}

->运算符重载

->操作就是返回数据的地址：

T* operator->()
{
	return &_node->_data;
}

++运算符重载

哈希表中++其实就是寻找当前哈希桶中的该结点下一个结点，如果一个哈希桶中已经寻找完，就去下一个哈希桶中进行寻找，直到找到为止；

代码如下：

Self& operator++()
{
	//寻找该结点下一个结点点
	if (_node->_next)
	{
		//下一个结点不为空，就指向下一个结点
		_node = _node->_next;
	}
	else
	{
		Hash hash;
		KeyOfT kot;

		//为空就计算该哈希桶所处位置的哈希地址
		size_t i = hash(kot(_node->_data)) % _pht->_tables.size();

		//地址++就计算出下一个桶的位置
		i++;

		//继续循环寻找
		for (; i < _pht->_tables.size(); i++)
		{
			if (_pht->_tables[i])
			{
				_node = _pht->_tables[i];
				break;
			}
		}
		//找完整个哈希表，就指向nullptr
		if (i == _pht->_tables.size())
		{
			_node = nullptr;
		}
	}

	return *this;
}

!= 和 == 运算符重载

!= 和 ==就是判断是不是同一个结点：

//!=
bool operator!=(const Self& s)
{
	return _node != s._node;
}
//==
bool operator==(const Self& s)
{
	return _node == s._node;
}

begin()与end()实现

begin函数返回哈希表当中第一个不为nullptr位置的正向迭代器。
end函数返回哈希表当中最后一个位置下一个位置的正向迭代器，这里直接用空指针构造一个正向迭代器。

class HashTable
{
	typedef HashNode<T> Node;

	template<class K, class T, class Hash, class KeyOfT>
	friend struct __HashIterator;
public:
	typedef __HashIterator <K, T, Hash, KeyOfT> iterator;

	iterator begin()
	{
		//从前往后遍历整个数组
		for (size_t i = 0; i < _tables.size(); i++)
		{
			//找到不为空的位置并返回该位置迭代器
			if (_tables[i])
			{
				return iterator(_tables[i], this);
			}
		}
		//最后返回end();
		return end();
	}

	iterator end()
	{
		//返回一个为空的位置的迭代器
		return iterator(nullptr, this);
	}
}

unordered_set实现

template<class K, class Hash = HashFunc<K>>
class unordered_set
{
	struct SetKeyOfT
	{
		const K& operator()(const K& key)
		{
			return key;
		}
	};
public:
	typedef typename HashBucket::HashTable <K, K, Hash, SetKeyOfT>::iterator iterator;

	iterator begin()
	{
		return _ht.begin();
	}

	iterator end()
	{
		return _ht.end();
	}

	pair<iterator, bool> insert(const K& key)
	{
		return _ht.Insert(key);
	}
private:
	HashBucket::HashTable<K, K, Hash, SetKeyOfT> _ht;
};

unordered_map实现

template<class K, class V, class Hash = HashFunc<K>>
class unordered_map
{
	struct MapKeyOfT
	{
		const K& operator()(const pair<K, V>& kv)
		{
			return kv.first;
		}
	};
public:
	typedef typename HashBucket::HashTable <K, pair<K, V>, Hash, MapKeyOfT>::iterator iterator;

	iterator begin()
	{
		return _ht.begin();
	}

	iterator end()
	{
		return _ht.end();
	}

	pair<iterator, bool> insert(const K& key)
	{
		_ht.Insert(key);
	}

	V& operator[](const K& key)
	{
		pair<iterator, bool> ret = _ht.Insert(make_pair(key, V()));
		return ret.first->second;
	}
private:
	HashBucket::HashTable<K, pair<K, V>, Hash, MapKeyOfT> _ht;
};

map/set 与 unordered_map/unordered_set对比

map/set 底层是使用红黑树实现的，unordered_map/unordered_set底层是用哈希表进行实现的，两者的底层实现是不同的，对于少量的数据，他们的增删查改没有区别，但是对于大量的数据unordered系列是要更胜一筹的，特别是对于查找来说，unordered系列基本可以一直保持高效率；

哈希表调整后代码

#pragma once

template<class K>
struct HashFunc
{
	size_t operator()(const K& key)
	{
		//所有类型都强转为size_t类型
		return (size_t)key;
	}
};

//模板特化
template<>
struct HashFunc<string>
{
	size_t operator()(const string& key)
	{
		size_t val = 0;
		for (auto ch : key)
		{
			val *= 131;
			val += ch;
		}

		return val;
	}
};

namespace HashBucket
{
	template<class T>
	struct HashNode
	{
		T _data;
		HashNode<T>* _next;

		//构造函数
		HashNode(const T& data)
			:_data(data)
			,_next(nullptr)
		{}
	};

	//前置声明
	template<class K, class T, class Hash, class KeyOfT>
	class HashTable;

	template<class K, class T, class Hash, class KeyOfT>
	struct __HashIterator
	{
		typedef HashNode<T> Node;
		typedef HashTable <K, T, Hash, KeyOfT> HT;
		typedef __HashIterator <K, T, Hash, KeyOfT> Self;

		
		Node* _node;
		HT* _pht;

		//构造函数
		__HashIterator(Node* node, HT* pht)
			:_node(node)
			,_pht(pht)
		{}

		T& operator*()
		{
			return _node->_data;
		}

		T* operator->()
		{
			return &_node->_data;
		}

		Self& operator++()
		{
			//寻找该结点下一个结点点
			if (_node->_next)
			{
				//下一个结点不为空，就指向下一个结点
				_node = _node->_next;
			}
			else
			{
				Hash hash;
				KeyOfT kot;

				//为空就计算该哈希桶所处位置的哈希地址
				size_t i = hash(kot(_node->_data)) % _pht->_tables.size();

				//地址++就计算出下一个桶的位置
				i++;

				//继续循环寻找
				for (; i < _pht->_tables.size(); i++)
				{
					if (_pht->_tables[i])
					{
						_node = _pht->_tables[i];
						break;
					}
				}
				//找完整个哈希表，就指向nullptr
				if (i == _pht->_tables.size())
				{
					_node = nullptr;
				}
			}

			return *this;
		}

		bool operator!=(const Self& s)
		{
			return _node != s._node;
		}

		bool operator==(const Self& s)
		{
			return _node == s._node;
		}
	};

	template<class K, class T, class Hash, class KeyOfT>
	class HashTable
	{
		typedef HashNode<T> Node;

		template<class K, class T, class Hash, class KeyOfT>
		friend struct __HashIterator;
	public:
		typedef __HashIterator <K, T, Hash, KeyOfT> iterator;

		iterator begin()
		{
			for (size_t i = 0; i < _tables.size(); i++)
			{
				if (_tables[i])
				{
					return iterator(_tables[i], this);
				}
			}

			return end();
		}

		iterator end()
		{
			return iterator(nullptr, this);
		}

		~HashTable()
		{
			for (size_t i = 0; i < _tables.size(); i++)
			{
				Node* cur = _tables[i];
				while (cur)
				{
					Node* next = cur->_next;
					delete cur;
					cur = next;
				}
				_tables[i] = nullptr;
			}
		}

		inline size_t __stl_next_prime(size_t n)
		{
			static const size_t __stl_num_primes = 28;
			static const size_t __stl_prime_list[__stl_num_primes] =
			{
				53, 97, 193, 389, 769,
				1543, 3079, 6151, 12289, 24593,
				49157, 98317, 196613, 393241, 786433,
				1572869, 3145739, 6291469, 12582917, 25165843,
				50331653, 100663319, 201326611, 402653189, 805306457,
				1610612741, 3221225473, 4294967291
			};

			for (size_t i = 0; i < __stl_num_primes; ++i)
			{
				if (__stl_prime_list[i] > n)
				{
					return __stl_prime_list[i];
				}
			}

			return -1;
		}
		pair<iterator, bool> Insert(const T& data)
		{
			Hash hash;
			KeyOfT kot;
			//如果该键值对存在，就返回false
			iterator ret = Find((kot(data)));
			if (ret != end())
			{
				return make_pair(ret, false);
			}
			//如果负载因子为1就扩容
			if (_size == _tables.size())
			{
				//创建一个新的哈希表
				vector<Node*> newTables;
				size_t newSizes = _size == 0 ? 10 : 2 * _tables.size();

				//将每个元素初始化为空
				newTables.resize(__stl_next_prime(_tables.size()), nullptr);

				//将旧表结点插入到新表当中
				for (size_t i = 0; i < _tables.size(); i++)
				{
					Node* cur = _tables[i];

					while (cur)
					{
						//记录cur的下一个结点
						Node* next = cur->_next;
						//计算相应的哈希桶编号
						size_t hashi = hash(kot(cur->_data)) % newTables.size();

						//将旧表结点移动值新表
						cur->_next = newTables[hashi];
						newTables[hashi] = cur;
						cur = next;
					}
					_tables[i] = nullptr;
				}
				_tables.swap(newTables);
			}
			//计算哈希桶编号
			size_t hashi = hash(kot(data)) % _tables.size();

			//插入结点
			Node* newnode = new Node(data);
			newnode->_next = _tables[hashi];
			_tables[hashi] = newnode;

			//元素个数++
			_size++;
			return make_pair(iterator(newnode, this), true);
		}
		//查找
		iterator Find(const K& key)
		{
			//哈希表为空就返回空
			if (_tables.size() == 0)
			{
				return end();
			}
			Hash hash;
			KeyOfT kot;
			//计算哈希地址
			size_t hashi = hash(key) % _tables.size();
			Node* cur = _tables[hashi];
			//遍历哈希桶
			while (cur)
			{
				if (kot(cur->_data) == key)
				{
					return iterator(cur, this);
				}
				cur = cur->_next;
			}
			return end();
		}
		//删除
		bool Erase(const K& key)
		{
			//哈希表大小为0，删除失败
			if (_tables.size() == 0)
			{
				return false;
			}
			Hash hash;
			//计算哈希地址
			size_t hashi = hash(key) % _tables.size();

			Node* prev = nullptr;
			Node* cur = _tables[hashi];
			//遍历哈希桶，寻找删除结点是否存在
			while (cur)
			{
				if (hash(kot(cur->_data)) == key)
				{
					if (prev)
					{
						prev->_next = cur->_next;
					}
					else
					{
						_tables[hashi] = cur->_next;
					}
					//删除该结点
					delete cur;
					_size--;

					return true;
				}

				prev = cur;
				cur = cur->_next;
			}
			//删除结点不存在，返回false
			return false;
		}
	private:
		vector<Node*> _tables;
		size_t _size = 0;
	};
}