封装unordered_set&&unordered_map

最新推荐文章于 2024-06-11 08:56:11 发布

Hiland.

最新推荐文章于 2024-06-11 08:56:11 发布

阅读量428

点赞数 1

分类专栏： # C++ 文章标签： c++

本文链接：https://blog.csdn.net/Britney_dark/article/details/130635667

版权

C++ 专栏收录该内容

36 篇文章 3 订阅

订阅专栏

注：实现unordered系列容器是为了学习，因此并非将全部接口实现，所以只实现部分接口

哈希表的改造

模板参数列表的改造

K：关键码类型
T：不同容器T的类型不同，如果是unordered_map，T代表一个键值对；如果是unordered_set，V为K
KeyOfT：因为V的类型不同，通过T取key的方式就不同
HashFunc：哈希函数仿函数对象类型，哈希函数使用除留余数法，需要将Key转换为整形数字才能取模

template<class K, class T, class KeyOfT, class HashFunc = DefaultfHash<T>>
class HashTable;

增加迭代器操作

    //声明节点类，否则迭代器无法识别
	template<class T>
	struct HashNode;
	//声明哈希表结构，否则迭代器无法识别
	template<class K, class T, class KeyOfT, class HashFunc>
	struct HashTable;

    //迭代器实现
	template<class K, class T, class KeyOfT, class HashFunc>
	class __HTIterator
	{
		typedef HashNode<T> Node;//节点
		typedef __HTIterator<K, T, KeyOfT, HashFunc> iterator;//返回迭代器
	public:
		Node* _node;
		HashTable<K, T, KeyOfT, HashFunc>* _pht;//跳向下一个桶需要
		
        //构造函数
		__HTIterator()
		{}
		__HTIterator(Node* node, HashTable<K, T, KeyOfT, HashFunc>* pht)
			:_node(node)
			, _pht(pht)
		{}

		iterator& operator++()
		{
			//分别处理同一个桶中迭代和跳向下一个桶迭代
			if (_node->_next)
			{
				_node = _node->_next;
			}
			else
			{
				size_t hashi = HashFunc()(KeyOfT()(_node->_data)) % _pht->_table.size();
				++hashi;
				for (; hashi < _pht->_table.size(); ++hashi)
				{
					if (_pht->_table[hashi])
					{
						_node = _pht->_table[hashi];
						break;
					}
				}
				//走到末尾
				if (hashi == _pht->_table.size())
					_node = nullptr;
			}

			return *this;
		}
        //返回数据
		T& operator*()
		{
			return _node->_data;
		}
        //返回数据的地址
		T* operator->()
		{
			return &_node->_data;
		}

		bool operator==(const iterator& it) const
		{
			return _node == it._node;
		}

		bool operator!=(const iterator& it) const
		{
			return _node != it._node;
		}
	};

增加通过T获取value操作

	//K针对find和erase需要使用
	//T是针对上层unordered_set传入的是K而unordered_map传入的是pair<K, V>做出更高维度泛型的处理
	//KeyOfT是提取出vaule中的unordered_set和unordered_map中的K
	//HashFunc哈希函数
	template<class K, class T, class KeyOfT, class HashFunc>
	struct HashTable
	{
		typedef HashNode<T> Node;
		//迭代器实现中需要访问哈希表中的私有成员
		template<class K, class T, class KeyOfT, class HashFunc>
		friend class __HTIterator;
	public:
		typedef __HTIterator<K, T, KeyOfT, HashFunc> iterator;
		//析构函数需要自己实现，vector存的是内置类型，不会释放节点，只会释放表
		~HashTable()
		{
			for (int i = 0; i < _table.size(); ++i)
			{
				Node* cur = _table[i];
				while (cur)
				{
					Node* next = cur->_next;
					if (cur)
						delete cur;
					cur = next;
				}
				_table[i] = nullptr;
			}
		}

		//需要寻找第一个存储数据的位置
		iterator begin()
		{
			for (int i = 0; i < _table.size(); ++i)
			{
				Node* cur = _table[i];
				while (cur)
					if (cur)
						return iterator(cur, this);
			}

			return iterator(nullptr, this);
		}

		//末尾为nullptr
		iterator end()
		{
			return iterator(nullptr, this);
		}

		pair<iterator, bool> Insert(const T& data)
		{
			//不存入key值相同数据
			iterator pos = Find(KeyOfT()(data));
			if (pos != end())
				return make_pair(pos, false);

			//负载因子大小和表的大小一样大的时候扩容
			if (_table.size() == _n)
			{
				size_t newsize = _table.size() == 0 ? 10 : _table.size() * 2;
				vector<Node*> newtable;
				newtable.resize(newsize, nullptr);
				//遍历旧表，将旧表中的节点插入到新表中
				for (int i = 0; i < _table.size(); ++i)
				{
					Node* cur = _table[i];
					while (cur)
					{
						Node* next = cur->_next;//存储next方便更新cur
						//寻找该节点在新表中的位置
						size_t hashi = HashFunc()(KeyOfT()(cur->_data)) % newsize;
						//单链表的头插
						cur->_next = newtable[hashi];//将待插入节点的next指向待插入位置的第一个节点
						newtable[hashi] = cur;//将待插入节点插入到待插入位置
						cur = next;//更新cur
					}
					_table[i] = nullptr;//将旧表位置置空
				}
				_table.swap(newtable);//将处理好的数据交换给旧表
			}

			//经过上面处理后将数据存入
			size_t hashi = HashFunc()(KeyOfT()(data));
			hashi %= _table.size();
			//将节点头插到链表中
			Node* newnode = new Node(data);
			newnode->_next = _table[hashi];
			_table[hashi] = newnode;

			++_n;

			return make_pair(iterator(newnode, this), true);
		}

		iterator Find(const K& key)
		{
			//表空无法查找
			if (_table.size() == 0)
				return iterator(nullptr, this);

			size_t hashi = HashFunc()(key);
			hashi %= _table.size();

			//从计算到位置处开始查询该处链表中是否存在该key
			Node* cur = _table[hashi];
			while (cur)
			{
				if (KeyOfT()(cur->_data) == key)
					return iterator(cur, this);
				cur = cur->_next;
			}

			return iterator(nullptr, this);
		}

		bool Erase(const K& key)
		{
			//表为空无法删除
			if (_table.size() == 0)
				return false;

			size_t hashi = HashFunc()(key);
			hashi %= _table.size();

			//找到对应位置后进行单链表的删除
			Node* cur = _table[hashi];
			Node* prev = nullptr;
			while (cur)
			{
				//单链表的头删和普通删除
				if (KeyOfT()(cur->_data) == key)
				{
					if (prev == nullptr)
						_table[hashi] = cur->_next;
					else
						prev->_next = cur->_next;
					delete cur;
					return true;
				}
				prev = cur;
				cur = cur->_next;
			}

			return false;
		}

	private:
		vector<Node*> _table;//每个位置存储链表
		size_t _n = 0;//负载因子
	};

以上是对哈希表的改造，将以上的实现和修改放在一起

HashTable.h的修改

#pragma once
#include <vector>
using namespace std;

//哈希函数
template<class K>
struct DefaultHash
{
	size_t operator()(const K& key)
	{
		return (size_t)key;
	}
};

template<>
struct DefaultHash<string>
{
	size_t operator()(const string& key)
	{
		size_t hashi = 0;
		for (auto& ch : key)
			hashi = hashi * 131 + ch;
		return hashi;
	}
};

namespace Bucket
{
	//声明节点类，否则迭代器无法识别
	template<class T>
	struct HashNode;
	//声明哈希表结构，否则迭代器无法识别
	template<class K, class T, class KeyOfT, class HashFunc>
	struct HashTable;

	//迭代器实现
	template<class K, class T, class KeyOfT, class HashFunc>
	class __HTIterator
	{
		typedef HashNode<T> Node;
		typedef __HTIterator<K, T, KeyOfT, HashFunc> iterator;
	public:
		Node* _node;
		HashTable<K, T, KeyOfT, HashFunc>* _pht;//跳向下一个桶需要
		
		__HTIterator()
		{}

		__HTIterator(Node* node, HashTable<K, T, KeyOfT, HashFunc>* pht)
			:_node(node)
			, _pht(pht)
		{}

		iterator& operator++()
		{
			//分别处理同一个桶中迭代和跳向下一个桶迭代
			if (_node->_next)
			{
				_node = _node->_next;
			}
			else
			{
				size_t hashi = HashFunc()(KeyOfT()(_node->_data)) % _pht->_table.size();
				++hashi;
				for (; hashi < _pht->_table.size(); ++hashi)
				{
					if (_pht->_table[hashi])
					{
						_node = _pht->_table[hashi];
						break;
					}
				}
				//走到末尾
				if (hashi == _pht->_table.size())
					_node = nullptr;
			}

			return *this;
		}

		T& operator*()
		{
			return _node->_data;
		}

		T* operator->()
		{
			return &_node->_data;
		}

		bool operator==(const iterator& it) const
		{
			return _node == it._node;
		}

		bool operator!=(const iterator& it) const
		{
			return _node != it._node;
		}
	};

	//节点的实现
	template<class T>
	struct HashNode
	{
		T _data;
		HashNode<T>* _next;

		HashNode(const T& data)
			:_data(data)
			, _next(nullptr)
		{}
	};

	//K针对find和erase需要使用
	//T是针对上层unordered_set传入的是K而unordered_map传入的是pair<K, V>做出更高维度泛型的处理
	//KeyOfT是提取出vaule中的unordered_set和unordered_map中的K
	//HashFunc哈希函数
	template<class K, class T, class KeyOfT, class HashFunc>
	struct HashTable
	{
		typedef HashNode<T> Node;
		//迭代器实现中需要访问哈希表中的私有成员
		template<class K, class T, class KeyOfT, class HashFunc>
		friend class __HTIterator;
	public:
		typedef __HTIterator<K, T, KeyOfT, HashFunc> iterator;
		//析构函数需要自己实现，vector存的是内置类型，不会释放节点，只会释放表
		~HashTable()
		{
			for (int i = 0; i < _table.size(); ++i)
			{
				Node* cur = _table[i];
				while (cur)
				{
					Node* next = cur->_next;
					if (cur)
						delete cur;
					cur = next;
				}
				_table[i] = nullptr;
			}
		}

		//需要寻找第一个存储数据的位置
		iterator begin()
		{
			for (int i = 0; i < _table.size(); ++i)
			{
				Node* cur = _table[i];
				while (cur)
					if (cur)
						return iterator(cur, this);
			}

			return iterator(nullptr, this);
		}

		//末尾为nullptr
		iterator end()
		{
			return iterator(nullptr, this);
		}

		pair<iterator, bool> Insert(const T& data)
		{
			//不存入key值相同数据
			iterator pos = Find(KeyOfT()(data));
			if (pos != end())
				return make_pair(pos, false);

			//负载因子大小和表的大小一样大的时候扩容
			if (_table.size() == _n)
			{
				size_t newsize = _table.size() == 0 ? 10 : _table.size() * 2;
				vector<Node*> newtable;
				newtable.resize(newsize, nullptr);
				//遍历旧表，将旧表中的节点插入到新表中
				for (int i = 0; i < _table.size(); ++i)
				{
					Node* cur = _table[i];
					while (cur)
					{
						Node* next = cur->_next;//存储next方便更新cur
						//寻找该节点在新表中的位置
						size_t hashi = HashFunc()(KeyOfT()(cur->_data)) % newsize;
						//单链表的头插
						cur->_next = newtable[hashi];//将待插入节点的next指向待插入位置的第一个节点
						newtable[hashi] = cur;//将待插入节点插入到待插入位置
						cur = next;//更新cur
					}
					_table[i] = nullptr;//将旧表位置置空
				}
				_table.swap(newtable);//将处理好的数据交换给旧表
			}

			//经过上面处理后将数据存入
			size_t hashi = HashFunc()(KeyOfT()(data));
			hashi %= _table.size();
			//将节点头插到链表中
			Node* newnode = new Node(data);
			newnode->_next = _table[hashi];
			_table[hashi] = newnode;

			++_n;

			return make_pair(iterator(newnode, this), true);
		}

		iterator Find(const K& key)
		{
			//表空无法查找
			if (_table.size() == 0)
				return iterator(nullptr, this);

			size_t hashi = HashFunc()(key);
			hashi %= _table.size();

			//从计算到位置处开始查询该处链表中是否存在该key
			Node* cur = _table[hashi];
			while (cur)
			{
				if (KeyOfT()(cur->_data) == key)
					return iterator(cur, this);
				cur = cur->_next;
			}

			return iterator(nullptr, this);
		}

		bool Erase(const K& key)
		{
			//表为空无法删除
			if (_table.size() == 0)
				return false;

			size_t hashi = HashFunc()(key);
			hashi %= _table.size();

			//找到对应位置后进行单链表的删除
			Node* cur = _table[hashi];
			Node* prev = nullptr;
			while (cur)
			{
				//单链表的头删和普通删除
				if (KeyOfT()(cur->_data) == key)
				{
					if (prev == nullptr)
						_table[hashi] = cur->_next;
					else
						prev->_next = cur->_next;
					delete cur;
					return true;
				}
				prev = cur;
				cur = cur->_next;
			}

			return false;
		}

	private:
		vector<Node*> _table;//每个位置存储链表
		size_t _n = 0;//负载因子
	};
}

值得一提的是对于扩容，为了提高效率有将容量扩至素数这种说法，但是并没有很好的理论支持，stl3.0源码SGI版本的有采用这种方式，但是由于并没有很好的理论的支持，这里不做处理

unordered_set的封装

#pragma once
#include "HashTable.h"

namespace mystl
{
	template<class K, class HashFunc = DefaultHash<K>>
	struct Unordered_Set
	{
		//提供K
		struct SetKeyOfT
		{
			const K& operator()(const K& key)
			{
				return key;
			}
		};

	public:
		typedef typename Bucket::HashTable<K, K, SetKeyOfT, HashFunc>::iterator iterator;

		iterator begin()
		{
			return _ht.begin();
		}

		iterator end()
		{
			return _ht.end();
		}

		pair<iterator, bool> insert(const K& key)
		{
			return _ht.Insert(key);
		}

		iterator find(const K& key)
		{
			return _ht.Find(key);
		}

		bool erase(const K& key)
		{
			return _ht.Erase(key);
		}

	private:
		Bucket::HashTable<K, K, SetKeyOfT, HashFunc> _ht;
	};
}

全是调用的HashTable的函数，没有什么很复杂的实现

unordered_map的封装

#pragma once
#include "HashTable.h"

namespace mystl
{
	template<class K, class V, class HashFunc = DefaultHash<K>>
	struct Unordered_Map
	{
		//提供K
		struct MapKeyOfT
		{
			const K& operator()(const pair<K, V>& kv)
			{
				return kv.first;
			}
		};

	public:
		typedef typename Bucket::HashTable<K, pair<K, V>, MapKeyOfT, HashFunc>::iterator iterator;

		iterator begin()
		{
			return _ht.begin();
		}

		iterator end()
		{
			return _ht.end();
		}

		pair<iterator, bool> Insert(const pair<K, V>& kv)
		{
			return _ht.Insert(kv);
		}

		iterator Find(const K& key)
		{
			return _ht.Find(key);
		}

		bool Erase(const K& key)
		{
			return _ht.Erase(key);
		}

        //实现operator[]
		V& operator[](const K& key)
		{
			//调用insert，不存在插入key，否则不插入key
			pair<iterator, bool> ret = _ht.Insert(make_pair(key, V()));
			return ret.first->second;//调用operator->取数据中的value
		}

	private:
		Bucket::HashTable<K, pair<K, V>, MapKeyOfT, HashFunc> _ht;
	};
}