[C++]STL---unordered_set与unordered_map的模拟实现

最新推荐文章于 2024-09-12 02:00:25 发布

小呆瓜历险记

最新推荐文章于 2024-09-12 02:00:25 发布

阅读量966

点赞数 33

文章标签： c++ 开发语言容器 STL

本文链接：https://blog.csdn.net/m0_58963318/article/details/138205470

版权

前言

unordered_set与set的区别：

set是基于红黑树实现的有序集合，而unordered_set是基于哈希表实现的无序集合；
set中的元素是按照特定的排序规则进行排序的，而unordered_set中的元素是无序的；
unordered_set通过哈希表实现，查找元素的效率较高，平均时间复杂度为O(1)，而set通过红黑树实现，查找元素的效率较低，平均时间复杂度为O(logN)；
unordered_set通常占用更多的内存，因为需要维护哈希表的结构，而set通常占用较少的内存；
unordered_set的迭代器为单向迭代器，只能向前迭代，不支持反向迭代，而set为双向迭代器；

unordered_map与map的区别：

map是基于红黑树实现的有序键值对容器，而unordered_map是基于哈希表实现的无序键值对容器；
map中的键值对是按照键的特定排序规则进行排序的，而unordered_map中的键值对是无序的；
unordered_map通过哈希表实现，查找键值对的效率较高，平均时间复杂度为O(1)，而map通过红黑树实现，查找键值对的效率较低，平均时间复杂度为O(logN)；
unordered_map通常占用更多的内存，因为需要维护哈希表的结构，而map通常占用较少的内存；
unordered_map的迭代器为单向迭代器，只能向前迭代，不支持反向迭代，而map为双向迭代器；

哈希桶的改造

unordered_map与unordered_set底层皆为哈希桶，因此底层采用一个哈希桶并且让它能够满足unordered_map和unordered_set的基本需求 ( 即采用一个哈希桶适配出unordered_set与unordered_map )，因此需要对哈希桶进行改造；

unordered_map为Key-Value模型，unordered_set为Key模型，因此首先修改链表节点中的数据类型，数据类型修改为T，如此T既可以接收键值对，也可以只接收键值；

template<class T>
struct HashNode
{
	HashNode<T>* _next;
	T _data;
	HashNode(const T& data)
		:_data(data)
		, _next(nullptr)
	{}
};

哈希表中插入Insert() 需要根据键值key判断待插入的数据是否存在，查找Find() 需要根据键值key判断待查找的数据是否存在，删除Erase() 需要根据键值Key确定待删除数据的位置，但是unordered_map数据类型为键值对pair<K，V>，需要将键值Key从键值对pair<K，V>取出，因此HashTable需要增加一个模板参数KeyOfT，这个模板参数接收分别由unordered_set、unordered_map传递的仿函数，此仿函数的作用为取出键值key（由于unordered_set与unorder_map类中传递数据类型是确定的，因此unordered_set与unorder_map类中实现仿函数取出各自的键值Key）；

思考：HashTable的模板参数Hash是否应该存在缺省值？

当数据类型为自定义类型（日期类、string类、......）当使用者使用unordered_set与unordered_map时，若HashTable（unordered_map与unordered_set的底层结构）中没有提供将某种数据类型转换为整型仿函数，需要使用者手动实现将此类型的数据转换为整型的仿函数，使用者显然不可以动手去修改底层代码；因此HashTable种Hash不应该存在缺省值，应该由哈希表的上层unordered_map和unordered_set传递，如此使用者也可以对于自定义类型的数据自定义Hash仿函数实现类型转换为整型；

unordered_map和unordered_set框架

//HashBucket.h文件
template<class T>
struct HashNode
{
	HashNode<T>* _next;
	T _data;
	HashNode(const T& data)
		:_data(data)
		, _next(nullptr)
	{}
};

template<class K, class T,class KeyOfT, class Hash>
class HashTable
{
	typedef HashNode<T> Node;
public:
    //...
private:
	vector<Node*> _tables;
	size_t _n;//记录哈希表中实际存放的数据个数
};

//Unordered_Set.h文件
template<class K, class Hash = HashFunc<K>>
class Unordered_Set
{
	struct SetKeyOfT
	{
		const K& operator()(const K& key)
		{
			return key;
		}
	};
public:

private:
	HashTable<K, K, SetKeyOfT, Hash> _ht;
};

//Unordered_Map文件
template<class K,class V,class Hash=HashFunc<K>>
class Unordered_Map
{
	struct MapKeyOfT
	{
		const K& operator()(const pair<K, V>& kv)
		{
			return kv.first;
		}
	};
public:

private:
	HashTable<K,pair<K,V>,MapKeyOfT,Hash>
};

哈希桶的初步改造

template<class K>
struct HashFunc
{
	size_t operator()(const K& key)
	{
		return (size_t)key;
	}
};

// 特化
template<>
struct HashFunc<string>
{
	size_t operator()(const string& s)
	{
		size_t hash = 0;
		for (auto e : s)
		{
			hash += e;
			hash *= 31;
		}
		return hash;
	}
};

	template<class T>
	struct HashNode
	{
		HashNode<T>* _next;
		T _data;
		HashNode(const T& data)
			:_data(data)
			, _next(nullptr)
		{}
	};

template<class K, class T,class KeyOfT, class Hash>
class HashTable
{
	typedef HashNode<T> Node;
public:
	//构造函数
	HashTable(size_t n = 10)
	{
		_tables.resize(n, nullptr);
		_n = 0;
	}
	//析构函数
	~HashTable()
	{
		for (size_t i = 0; i < _tables.size(); i++)
		{
			Node* cur = _tables[i];
			while (cur != nullptr)
			{
				Node* next = cur->_next;
				delete cur;
				cur = next;
			}
			_tables[i] = nullptr;
		}
	}
	//拷贝构造函数  ht2(ht1)
	HashTable(const HashTable& ht)
	{
		Hash hs;
		KeyOfT kot;
		 //开辟相同大小的空间
		_tables.resize(ht._tables.size());
		//遍历旧表,头插到新表
		for (size_t i = 0; i < ht._tables.size(); i++)
		{
			Node* cur = ht._tables[i];
			while (cur != nullptr)
			{
				Node* next = cur->_next;

				//计算新表的插入位置
				size_t hashi = hs(kot(cur->_data)) % _tables.size();

				cur->_next = _tables[hashi];
				_tables[hashi] = cur;
				cur = next;
			}
		}
		_n = ht._n;
	}

	//赋值运算符重载 ht2=ht1
	HashTable& operator=(HashTable ht)
	{
		_tables.swap(ht._tables);
		swap(_n, ht._n);

		return *this;
	}

	Node* Find(const K& key)
	{
		KeyOfT kot;
		Hash hs;
		size_t hashi = hs(key) % _tables.size();
		Node* cur = _tables[hashi];
		while (cur != nullptr)
		{
			//仿函数对象kot获取结点中的键值
			if (kot((cur->_data)) == key)
			{
				return cur;
			}
			cur = cur->_next;
		}
		return nullptr;
	}


	bool Insert(const T& data)
	{
		//仿函数对象kot获取data中的键值key
		KeyOfT kot;
		//仿函数对象hs将data中的键值key转换为整型
		Hash hs;

		Node* ret = Find(kot(data));
		if (ret != nullptr)
		{
			return false;
		}

		if (_n == _tables.size())
		{
			vector<Node*> _newtables(_tables.size() * 2);
			for (size_t i = 0; i < _tables.size(); i++)
			{
				Node* cur = _tables[i];
				while (cur != nullptr)
				{
					Node* nextnode = cur->_next;
					//首先仿函数(KeyOfT)对象kot获取数据data的键值key
					//最后仿函数(Hash)对象hs将键值key的类型转换为整型
					size_t hashi = hs(kot(cur->_data)) % _newtables.size();
					cur->_next = _newtables[hashi];
					_newtables[hashi] = cur;
					cur = nextnode;
				}
				_tables[i] = nullptr;
			}
			_tables.swap(_newtables);
		}

		//首先仿函数(KeyOfT)对象kot获取数据data的键值key
		//最后仿函数(Hash)对象hs将键值key的类型转换为整型
		size_t hashi = hs(kot(data)) % _tables.size();
		Node* newnode = new Node(data);

		//单链表头插
		newnode->_next = _tables[hashi];
		_tables[hashi] = newnode;
		++_n;
		return true;
	}

	bool Erase(const K& key)
	{
		Hash hs;
		KeyOfT kot;
		size_t hashi = hs(key) % _tables.size();
		Node* cur = _tables[hashi];
		Node* prev = nullptr;
		while (cur != nullptr)
		{
			//仿函数(KeyOfT)对象kot获取数据data的键值key确定待删除数据的位置
			if (kot((cur->_data)) == key)
			{
				//删除
				if (prev != nullptr)
				{
					prev->_next = cur->_next;
				}
				else
				{
					_tables[hashi] = cur->_next;
				}
				delete cur;

				--_n;
				return true;
			}
			else
			{
				prev = cur;
				cur = cur->_next;
			}
		}
		return false;
	}
private:
	vector<Node*> _tables;
	size_t _n;
};

迭代器的模拟实现

所有容器必须具有迭代器，使用迭代器遍历容器中的数据；因此需要给哈希桶增加迭代器以供unordered_set与unordered_map使用；

思考：迭代器it的++如何实现？

++it操作：

若it不是某个桶的最后一个元素，则it指向这个桶的下一个结点；
若it为某个桶的最后一个元素，则it指向下一个桶的头节点；

若实现++it的操作，不仅需要结点指针记录当前结点，还需要一个哈希桶的指针，方便查找下一个桶；

template<class K, class T, class KeyOfT, class Hash>
struct __HTIterator
{
	typedef HashNode<T> Node;
	typedef HashTable<K, T, KeyOfT, Hash> HT;
	typedef __HTIterator<K, T, KeyOfT, Hash> Self;

	Node* _node;//记录当前结点
	HT* _ht;//哈希表指针用于查找下一个桶

	__HTIterator(Node* node, HT* ht)
		:_node(node)
		, _ht(ht)
	{}
};

operator++()

Self& operator++()
{
	//当前桶未遍历结束,指向桶中下一个结点
	if (_node->_next != nullptr)
	{
		_node = _node->_next;
	}
	//当前桶已遍历结束,寻找下一个桶
	else
	{
		//首先确定当前桶在哈希表中的位置
		Hash hs;
		KeyOfT kot;
		size_t hashi = hs(kot(_node->_data)) % _ht->_tables.size();
		//查找下一个桶
		++hashi;
		while (hashi < _ht->_tables.size())
		{
			//找到下一个桶,it指向桶的头节点
			if (_ht->_tables[hashi] != nullptr)
			{
				_node = ht->_tables[hashi];
				break;//++结束
			}
			++hashi;
		}
		//跳出循环具有两种情形
		//case1:未找到哈希表中的下一个桶,采用空指针作为迭代器的end()
		//case2:找到哈希表中的下一个桶的头节点,返回迭代器
		if (hashi == _ht->_tables.size())
		{
			//case1
			_node = nullptr;
		}
		
	}
        //case2
		return *this;
}

类互相typedef时的前置声明

迭代器中一个成员变量为哈希表指针，而哈希表中需要给迭代器提供出口，哈希表与迭代器的定义必然存在先后顺序，本文先定义迭代器，后定义哈希表（若先定义哈希表，后定义迭代器也会面临同样的问题），此时迭代器中在重定义哈希表时必然找不到哈希表的定义，由于编译器只会向上查找而不会向下查找，所以必须在__HTIterator类前面先声明HashTable类，这种操作叫做前置声明；

友元声明

迭代器it ++时，使用哈希表指针访问_tables，而HashTable中的_tables为私有成员，类外不可以被访问，本文采用友元声明解决此问题，类模板的友元声明需要写模板参数，类名前面加friend关键字；

迭代器的出口

将哈希表中第一个桶的头节点作为遍历的起始位置，将空指针作为迭代器遍历的终止位置；

iterator end()
{
	return iterator(nullptr, this);
}

iterator begin()
{
	for (size_t i = 0; i < _tables.size(); i++)
	{
		// 哈希表中第一个桶的第一个节点
		if (_tables[i])
		{
			return iterator(_tables[i], this);
		}
	}
	return end();
}

插入Insert()

Insert()函数返回类型修改为pair<iterator, bool>，为实现unordered_map中的operator[]重载做准备；

键值对<iterator，bool>其中iterator代表新插入结点的迭代器，bool值表示插入是否成功；
若新结点键值key原先存在，则返回哈希表中原本存在结点的迭代器，并且插入失败，返回false；
若新结点键值key原先不存在，则插入结点，返回新插入结点在哈希表中的迭代器，返回true；

pair<iterator, bool> Insert(const T& data)
{
	KeyOfT kot;
	Hash hs;

	iterator ret = Find(kot(data));
	if (ret != end())
	{
		//键值key原先已存在,插入失败,返回已存在结点的迭代器并且返回false
		return make_pair(ret, false);
	}

	if (_n == _tables.size())
	{
		vector<Node*> _newtables(_tables.size() * 2);
		for (size_t i = 0; i < _tables.size(); i++)
		{
			Node* cur = _tables[i];
			while (cur != nullptr)
			{
				Node* nextnode = cur->_next;

				size_t hashi = hs(kot(cur->_data)) % _newtables.size();
				cur->_next = _newtables[hashi];
				_newtables[hashi] = cur;
				cur = nextnode;
			}
			_tables[i] = nullptr;
		}
		_tables.swap(_newtables);
	}

	size_t hashi = hs(kot(data)) % _tables.size();
	Node* newnode = new Node(data);

	newnode->_next = _tables[hashi];
	_tables[hashi] = newnode;
	++_n;

	//键值key原先不存在,插入成功,返回新结点的迭代器并且返回true
	return make_pair(newnode, true);
}

查找Find(）

//查找到返回哈希表指针与当前哈希表的结点指针
//若查找不到返回哈希表指针与空指针
iterator Find(const K& key)
{
	KeyOfT kot;
	Hash hs;
	size_t hashi = hs(key) % _tables.size();
	Node* cur = _tables[hashi];
	while (cur != nullptr)
	{
		//仿函数对象kot获取结点中的键值
		if (kot((cur->_data)) == key)
		{
			return iterator(cur, this);
		}
		cur = cur->_next;
	}
	return iterator(nullptr, this);
}

哈希表的最终改造

template<class K>
struct HashFunc
{
	size_t operator()(const K& key)
	{
		return (size_t)key;
	}
};

// 特化
template<>
struct HashFunc<string>
{
	size_t operator()(const string& s)
	{
		size_t hash = 0;
		for (auto e : s)
		{
			hash += e;
			hash *= 31;
		}
		return hash;
	}
};

template<class T>
struct HashNode
{
	HashNode<T>* _next;
	T _data;
	HashNode(const T& data)
		:_data(data)
		, _next(nullptr)
	{}
};

//哈希表前置声明
template<class K, class T, class KeyOfT, class Hash>
class HashTable;

template<class K, class T, class KeyOfT, class Hash>
struct __HTIterator
{
	typedef HashNode<T> Node;
	typedef HashTable<K, T, KeyOfT, Hash> HT;
	typedef __HTIterator<K, T, KeyOfT, Hash> Self;

	Node* _node;//记录当前结点
	HT* _ht;//哈希表指针用于查找下一个桶

	__HTIterator(Node* node, HT* ht)
		:_node(node)
		, _ht(ht)
	{}

	Self& operator++()
	{
		//当前桶未遍历结束,指向桶中下一个结点
		if (_node->_next != nullptr)
		{
			_node = _node->_next;
		}
		//当前桶已遍历结束,寻找下一个桶
		else
		{
			//首先确定当前桶在哈希表中的位置
			Hash hs;
			KeyOfT kot;
			size_t hashi = hs(kot(_node->_data)) % _ht->_tables.size();
			//查找下一个桶
			++hashi;
			while (hashi < _ht->_tables.size())
			{
				//找到下一个桶,it指向桶的头节点
				if (_ht->_tables[hashi] != nullptr)
				{
					_node = ht->_tables[hashi];
					break;//++结束
				}
				++hashi;
			}
			//跳出循环具有两种情形
			//case1:未找到哈希表中的下一个桶,采用空指针作为迭代器的end()
			//case2:找到哈希表中的下一个桶的头节点,返回迭代器
			if (hashi == _ht->_tables.size())
			{
				//case1
				_node = nullptr;
			}
			//case2
			return *this;
		}
	}

	T& operator*()
	{
		return _node->_data;
	}

	T* operator->()
	{
		return &_node->_data;
	}

	bool operator!=(const Self& sl) const
	{
		return _node != sl._node;
	}

	bool operator==(const Self& sl) const
	{
		return _node == sl._node;
	}
};

template<class K, class T,class KeyOfT, class Hash>
class HashTable
{
	template<class K, class T, class KeyOfT, class Hash>
	friend struct __HTIterator;

	typedef HashNode<T> Node;
public:
	typedef __HTIterator<K, T, KeyOfT, Hash> iterator;

	iterator end()
	{
		return iterator(nullptr, this);
	}

	iterator begin()
	{
		for (size_t i = 0; i < _tables.size(); i++)
		{
			// 哈希表中第一个桶的第一个节点
			if (_tables[i])
			{
				return iterator(_tables[i], this);
			}
		}
		return end();
	}

	//构造函数
	HashTable(size_t n = 10)
	{
		_tables.resize(n, nullptr);
		_n = 0;
	}
	//析构函数
	~HashTable()
	{
		for (size_t i = 0; i < _tables.size(); i++)
		{
			Node* cur = _tables[i];
			while (cur != nullptr)
			{
				Node* next = cur->_next;
				delete cur;
				cur = next;
			}
			_tables[i] = nullptr;
		}
	}

	//拷贝构造函数  ht2(ht1)
	HashTable(const HashTable& ht)
	{
		Hash hs;
		KeyOfT kot;
		 //开辟相同大小的空间
		_tables.resize(ht._tables.size());
		//遍历旧表,头插到新表
		for (size_t i = 0; i < ht._tables.size(); i++)
		{
			Node* cur = ht._tables[i];
			while (cur != nullptr)
			{
				Node* next = cur->_next;

				//计算新表的插入位置
				size_t hashi = hs(kot(cur->_data)) % _tables.size();

				cur->_next = _tables[hashi];
				_tables[hashi] = cur;
				cur = next;
			}
		}
		_n = ht._n;
	}

	//赋值运算符重载 ht2=ht1
	HashTable& operator=(HashTable ht)
	{
		_tables.swap(ht._tables);
		swap(_n, ht._n);

		return *this;
	}
	//查找到返回哈希表指针与当前哈希表的结点指针
	//若查找不到返回哈希表指针与空指针
	iterator Find(const K& key)
	{
		KeyOfT kot;
		Hash hs;
		size_t hashi = hs(key) % _tables.size();
		Node* cur = _tables[hashi];
		while (cur != nullptr)
		{
			//仿函数对象kot获取结点中的键值
			if (kot((cur->_data)) == key)
			{
				return iterator(cur, this);
			}
			cur = cur->_next;
		}
		return iterator(nullptr, this);
	}


	pair<iterator,bool> Insert(const T& data)
	{
		KeyOfT kot;
		Hash hs;

		iterator ret = Find(kot(data));
		if (ret != end())
		{
			//键值key原先已存在,插入失败,返回已存在结点的迭代器并且返回false
			return make_pair(ret, false);
		}

		if (_n == _tables.size())
		{
			vector<Node*> _newtables(_tables.size() * 2);
			for (size_t i = 0; i < _tables.size(); i++)
			{
				Node* cur = _tables[i];
				while (cur != nullptr)
				{
					Node* nextnode = cur->_next;
				
					size_t hashi = hs(kot(cur->_data)) % _newtables.size();
					cur->_next = _newtables[hashi];
					_newtables[hashi] = cur;
					cur = nextnode;
				}
				_tables[i] = nullptr;
			}
			_tables.swap(_newtables);
		}

		size_t hashi = hs(kot(data)) % _tables.size();
		Node* newnode = new Node(data);

		newnode->_next = _tables[hashi];
		_tables[hashi] = newnode;
		++_n;

		//键值key原先不存在,插入成功,返回新结点的迭代器并且返回true
		return make_pair(newnode, true);
	}

	bool Erase(const K& key)
	{
		Hash hs;
		KeyOfT kot;
		size_t hashi = hs(key) % _tables.size();
		Node* cur = _tables[hashi];
		Node* prev = nullptr;
		while (cur != nullptr)
		{
			//仿函数(KeyOfT)对象kot获取数据data的键值key确定待删除数据的位置
			if (kot((cur->_data)) == key)
			{
				//删除
				if (prev != nullptr)
				{
					prev->_next = cur->_next;
				}
				else
				{
					_tables[hashi] = cur->_next;
				}
				delete cur;

				--_n;
				return true;
			}
			else
			{
				prev = cur;
				cur = cur->_next;
			}
		}
		return false;
	}
private:
	vector<Node*> _tables;
	size_t _n;
};

unordered_set的模拟实现

#include "HashBucket.h"
template<class K, class Hash = HashFunc<K>>
class Unordered_Set
{
	struct SetKeyOfT
	{
		const K& operator()(const K& key)
		{
			return key;
		}
	};
public:
	typedef typename HashTable<K, K, SetKeyOfT, Hash>::iterator iterator;

	iterator begin()
	{
		return _ht.begin();
	}

	iterator end()
	{
		return _ht.end();
	}

	//插入
	pair<iterator, bool> insert(const K& key)
	{
		return _ht.Insert(key);
	}
	//查找
	iterator find(const K& key)
	{
		return _ht.Find(key);
	}
	//删除
	bool erase(const K& key)
	{
		return _ht.Erase(key);
	}
private:
	HashTable<K, K, SetKeyOfT, Hash> _ht;
};

unordered_map的模拟实现

#include "HashBucket.h"
template<class K,class V,class Hash=HashFunc<K>>
class Unordered_Map
{
	struct MapKeyOfT
	{
		const K& operator()(const pair<K, V>& kv)
		{
			return kv.first;
		}
	};
public:
	typedef typename HashTable<K, pair<K, V>, MapKeyOfT, Hash>::iterator iterator;

	iterator begin()
	{
		return _ht.begin();
	}

	iterator end()
	{
		return _ht.end();
	}
	//插入
	pair<iterator,bool> insert(const pair<K, V>& kv)
	{
		return _ht.Insert(kv);
	}
	//查找
	iterator find(const K& key)
	{
		return _ht.Find(key);
	}
	//删除
	bool erase(const K& key)
	{
		return _ht.Erase(key);
	}

	V& operator[](const K& key)
	{
		pair<iterator, bool> ret = insert(make_pair(key, V()));
		return ret.first->second;
	}

private:
	HashTable<K, pair<K, V>, MapKeyOfT, Hash> _ht;
};