C++进阶-哈希封装（unordered_set和unordered_set）

bingo691

于 2024-07-30 18:33:13 发布

阅读量362

点赞数 4

分类专栏： C++进阶文章标签： c++ 哈希算法

本文链接：https://blog.csdn.net/weixin_64423718/article/details/140802666

版权

C++进阶专栏收录该内容

16 篇文章 0 订阅

订阅专栏

1. 哈希封装思路

首先，内层用的是哈希表，哈希表使用泛型编程，外层unordered_map和unordered_set给与相应的参数，之后编译器自动生成。

2. 哈希封装的模拟实现

主要由三个头文件进行封装，My_unordered_map.h，My_unordered_set.h和HashTable.h头文件，分别装有My_unordered_map，My_unordered_set，哈希表。

2.1 HashTable.h

除留取余法的仿函数，让数据类型转换为可以取余的类型

template<class K>
struct HashFunc
{
	size_t operator()(const K& key)
	{
		return (size_t)key;
	}
};

// 特化
template<>
struct HashFunc<string>
{
	// abcd
	// bcad
	// aadd
	// BKDR
	size_t operator()(const string& key)
	{
		size_t hash = 0;
		for (auto ch : key)
		{
			hash *= 131;
			hash += ch;
		}

		return hash;
	}
};

2.1.1 节点数据类

用来初始化节点，定义节点数据

// 节点数据
template<class T>
struct HashNode
{
	T _data;// 节点数据
	HashNode<T>* _next;// 下一个节点地址

	// 构造函数
	HashNode(const T& data)
		:_data(data),
		_next(nullptr)
	{}
};

2.1.2 迭代器类

operator++()

节点是这个表索引位置下的最后一个节点
a.找下一个不为空的数组索引的指针，找到
b.没找到，直接置空

节点不是表索引的位置下的最后一个节点，直接找下一个节点

// ++
	// 如果当前节点是这个表的_tables[i]的最后一个节点，++需要找到数组的下一个索引
	// 所以需要引入表，才可以找到下一个位置
	// 1.节点是这个表索引位置下的最后一个节点
	//	a.找下一个不为空的数组索引的指针，找到
	//	b.没找到，直接置空
	// 2.节点不是表索引的位置下的最后一个节点，直接找下一个节点
	Self& operator++()
	{
		// 2.节点不是表索引的位置下的最后一个节点，直接找下一个节点
		if (_node->_next)
		{
			_node = _node->_next;
		}
		else// 1.节点是这个表索引位置下的最后一个节点
		{
			Hash hs;
			KeyOfT kot;
			// 首先找到这个表上当前节点的索引位置
			size_t i = hs(kot(_node->_data)) % _pht->_tables.size();

			++i;
			// 寻找这个表上下一个指向不为空的索引下的指针
			for (; i < _pht->_tables.size(); i++)
			{
				// 如果不为空，则就找到了
				if (_pht->_tables[i])
				{
					break;
				}
			}

			// 如果遍历完表也没找到指向不为空的索引指针
			if (i == _pht->_tables.size())
			{
				_node = nullptr;
			}
			else
			{
				_node = _pht->_tables[i];// _node指向这个索引位置下指针指向的第一个节点
			}

		}

		return *this;
	}

由于如果当前节点是当前索引的最后一个节点时，则要找下一个指向不为空的索引指针，所以要获取当前表。所以成员会引入
const HashTable<K, T, KeyOfT, Hash> _pht;// 引用一个表的指针*
但是哈希表中也会引入迭代器，所以两两形式互相依赖，所以需要在迭代器类的前面进行哈希表类的声明


// 由于__HTIterator类和HashTable互相依赖，__HTIterator中引入表的成员变量，HashTable也引入了迭代器
// 两两互相依赖
// 由于__HTIterator先定义的，得HashTable类在前面声明，否则无法找到HashTable
template<class K, class T, class KeyOfT, class Hash>
class HashTable;

迭代器类的总体代码

// 哈希桶的迭代器
//        data														T&          T*
template<class K,class T,  class KeyOfT, class Hash, class Ref, class Ptr>
struct __HTIterator
{
public:
	// 重定义节点数据
	typedef HashNode<T> Node;

	// 重定义迭代器数据
	typedef __HTIterator<K, T, KeyOfT, Hash, Ref,Ptr> Self;

	// 构造函数
	__HTIterator(Node* node, const HashTable<K, T, KeyOfT, Hash>* pht)
		:_node(node),
		_pht(pht)
	{}

	// 解引用
	// T&
	Ref operator*()
	{
		return _node->_data;
	}

	// 解引用
	// T*
	Ptr operator->()
	{
		return &_node->_data;
	}

	// !=
	bool operator!=(const Self& t)
	{
		return _node != t._node;
	}

	// ++
	// 如果当前节点是这个表的_tables[i]的最后一个节点，++需要找到数组的下一个索引
	// 所以需要引入表，才可以找到下一个位置
	// 1.节点是这个表索引位置下的最后一个节点
	//	a.找下一个不为空的数组索引的指针，找到
	//	b.没找到，直接置空
	// 2.节点不是表索引的位置下的最后一个节点，直接找下一个节点
	Self& operator++()
	{
		// 2.节点不是表索引的位置下的最后一个节点，直接找下一个节点
		if (_node->_next)
		{
			_node = _node->_next;
		}
		else// 1.节点是这个表索引位置下的最后一个节点
		{
			Hash hs;
			KeyOfT kot;
			// 首先找到这个表上当前节点的索引位置
			size_t i = hs(kot(_node->_data)) % _pht->_tables.size();

			++i;
			// 寻找这个表上下一个指向不为空的索引下的指针
			for (; i < _pht->_tables.size(); i++)
			{
				// 如果不为空，则就找到了
				if (_pht->_tables[i])
				{
					break;
				}
			}

			// 如果遍历完表也没找到指向不为空的索引指针
			if (i == _pht->_tables.size())
			{
				_node = nullptr;
			}
			else
			{
				_node = _pht->_tables[i];// _node指向这个索引位置下指针指向的第一个节点
			}

		}

		return *this;
	}

private:
	Node* _node;// 节点数据
	const HashTable<K, T, KeyOfT, Hash>* _pht;// 引用一个表的指针
};

注意：
由于const迭代器中的begin和end都是const类型函数，const修饰函数的是this本身不能修改，所以迭代器类中的表指针要加上const

2.1.3 哈希表类

注意：

迭代器类中的表不能访问到哈希表类中私有成员的_tables，所以让迭代器类成为哈希表类的友元

// 哈希表
//			Key	   Value  转换为可以比较数据的key   哈希强制转换为可取余的值
template< class K, class T, class KeyOfT, class Hash>
class HashTable
{
public:

	// 由于_tables在HashTable类中是私有成员，无法在__HTIterator类被访问
	// 所以将__HTIterator变成HashTable的友元
	template<class K, class T, class KeyOfT, class Hash, class Ref, class Ptr>
	friend struct __HTIterator;

	// 重定义节点
	typedef HashNode<T> Node;

	// 引入普通迭代器
	typedef typename __HTIterator<K, T, KeyOfT, Hash, T&, T*> Iterator;

	// 引入const迭代器
	typedef typename __HTIterator<K, T, KeyOfT, Hash, const T&,const T*> Const_Iterator;


	// Begin()
	// 找到第一个指向不为空的索引的指针
	Iterator Begin()
	{
		for (size_t i = 0; i < _tables.size(); i++)
		{
			Node* cur = _tables[i];
			if (cur)
			{
				return Iterator(cur, this);
			}
		}

		// 没找到
		return Iterator(nullptr, this);
	}

	// End()
	// 直接返回空
	Iterator End()
	{
		return Iterator(nullptr, this);
	}

	// const迭代器
	// Begin()
	// 找到第一个指向不为空的索引的指针
	Const_Iterator Begin() const
	{
		for (size_t i = 0; i < _tables.size(); i++)
		{
			Node* cur = _tables[i];
			if (cur)
			{
				return Const_Iterator(cur, this);
			}
		}

		// 没找到
		return Const_Iterator(nullptr, this);
	}

	// End()
	// 直接返回空
	Const_Iterator End() const
	{
		return Const_Iterator(nullptr, this);
	} 
	

	// 构造函数
	HashTable()
	{
		_tables.resize(10, nullptr);// 提前开辟好空间
		_n = 0;
	}

	// 析构函数
	~HashTable()
	{
		// vector的数组有自己的析构函数
		// 我们只有写相应的Node*节点的销毁
		// 遍历整个vector，全部删除节点
		for (size_t i = 0; i < _tables.size(); i++)
		{
			Node* cur = _tables[i];

			while (cur)
			{
				Node* next = cur->_next;

				delete cur;

				cur = next;
			}

			_tables[i] = nullptr;
		}
	}

	// 插入
	pair<Iterator, bool> Insert(const T& data)
	{
		KeyOfT kot;// 将数据类型转换为可以比较大小的数据类型
		// 不能冗余
		Iterator it = Find(kot(data));// 查找
		if (it != End())// 找到
		{
			return make_pair(it, false);
		}
		
		// 扩容
		// 负载因子为1，理想状态下，平均每个桶下面挂一个数据
		Hash hs;// 将数据转换为可取余的数据类型
		
		// 第二种扩容，建立一个Node*数据的数组
		if (_n == _tables.size())
		{
			vector<Node*> newTables(_tables.size() * 2, nullptr);

			for (size_t i = 0; i < _tables.size(); i++)
			{
				// 取旧表中的每一个头节点指针
				Node* cur = _tables[i];

				while (cur)
				{
					// 记录旧表的cur的下一节点位置，cur被移动到新表中时，将下面的节点与数组的指针相连
					Node* next = cur->_next;

					// 头插到新表的位置
					// 先判断旧表数据在新表的哪个位置
					size_t hashi = hs(kot(cur->_data)) % newTables.size();

					// 旧表数据插入到新表中
					cur->_next = newTables[hashi];
					newTables[hashi] = cur;

					cur = next;// 之后将旧表的节点数据向前移动一个
				}

				_tables[i] = nullptr;// 旧表当前节点指针，全部插入到新表完后，将旧表的指针指向空
			}

			// 旧表数据全部插入到新表中后
			// 交换两表的数据
			_tables.swap(newTables);

		}


		// 查找相应的数据存放在数组的相对位置
		size_t hashi = hs(kot(data)) % _tables.size();
		Node* newnode = new Node(data);

		// 头插新节点
		// _tables[hashi]只是一个指针，并不是哨兵位
		newnode->_next = _tables[hashi];
		_tables[hashi] = newnode;
		++_n;

		return make_pair(Iterator(newnode, this), true);
	}

	// 查找
	Iterator Find(const K& key)
	{
		// 运用仿函数对象，才能进行哈希的比较
		Hash hs;
		KeyOfT kot;

		// 查找相应的数据存放在数组的相对位置
		size_t hashi = hs(key) % _tables.size();

		// 数组相应位置的节点数据
		Node* cur = _tables[hashi];

		// 链表查找
		while (cur)
		{
			// 相等，找到了
			if (kot(cur->_data) == key)
			{
				return Iterator(cur, this);
			}
			else// 不相等，则就继续向下查找
			{
				cur = cur->_next;
			}
		}

		// 找完了，没找到
		return Iterator(nullptr, this);

	}


	// 删除
	// 找到相应的key
	bool Erase(const K& key)
	{
		Hash hs;

		// 先找到key在数组中的相应位置
		size_t hashi = hs(key) % _tables.size();

		// 删除节点
		// 提前记录前一节点的指针
		Node* prev = nullptr;
		Node* cur = _tables[hashi];

		while (cur)
		{
			// 如果找到相应的节点时
			if (cur->_kv.first == key)
			{
				// 开始删除
				// 删除头节点
				if (prev == nullptr)
				{
					_tables[hashi] = cur->_next;
				}
				else// 删除非头节点
				{
					prev->_next = cur->_next;
				}

				delete cur;

				return true;
			}
			else
			{
				prev = cur;
				cur = cur->_next;
			}

		}

		return false;
	}
private:
	vector<Node*> _tables;
	size_t _n;

	//vector<list<pair<K, V>>> _tables; 不用这种，这种不好套迭代器
};

2.2 My_unordered_map.h

#pragma once
#include"HashTable.h"

template<class K, class V, class Hash = HashFunc<K>>
class unordered_map
{
	// 仿函数
	struct MapKeyOfT
	{
		const K& operator()(const pair<K, V>& kv)
		{
			return kv.first;
		}
	};

public:

	// 引入普通迭代器
	typedef typename hash_bucket::HashTable<K, pair<const K, V>, MapKeyOfT, Hash>::Iterator iterator;


	iterator begin()
	{
		return _ht.Begin();
	}

	iterator end()
	{
		return _ht.End();
	}

	// 插入
	pair<iterator, bool> Insert(const pair<K, V>& kv)
	{
		return _ht.Insert(kv);
	}

	// 查找
	iterator Find(const K& key)
	{
		return _ht.Find(key);
	}


	// 删除
	bool Erase(const K& key)
	{
		return _ht.Erase(key);
	}

	// []运算符重载
	// 返回值为V
	V& operator[](const K& key)
	{
		pair<iterator, bool> ret = Insert(make_pair(key,V()));
		return ret.first->second;// 调用operator->
		
	}

private:
	hash_bucket::HashTable<K, pair<const K, V>, MapKeyOfT, Hash> _ht;

};

测试用例

void test_unordered_map()
{
	string arr[] = { "苹果", "西瓜", "苹果", "西瓜", "苹果", "苹果", "西瓜",
"苹果", "香蕉", "苹果", "香蕉","苹果","草莓", "苹果","草莓" };
	unordered_map<string, int> countMap;
	for (auto& e : arr)
	{
		countMap[e]++;
	}

	unordered_map<string, int>::iterator it = countMap.begin();
	while (it != countMap.end())
	{
		//it->first += 'x'; // key不能修改
		//it->second += 1;  // value可以修改
		cout << it->first << ":" << it->second << endl;
		++it;
	}
	cout << endl;

	for (auto& kv : countMap)
	{
		cout << kv.first << ":" << kv.second << endl;
	}
	cout << endl;
}

2.3 My_unordered_set.h

#pragma once
#include"HashTable.h"

template<class K, class Hash = HashFunc<K>>
class unordered_set
{
public:
	// 仿函数
	// 类型不同，转换为可以比较大小的数据
	struct SetKeyOfT
	{
		const K& operator()(const K& key)
		{
			return key;
		}
	};

	// 引入普通迭代器
	typedef typename hash_bucket::HashTable<K, const K, SetKeyOfT, Hash>::Iterator iterator;

	// const迭代器
	typedef typename hash_bucket::HashTable<K, const K, SetKeyOfT, Hash>::Const_Iterator const_iterator;

	// begin()
	iterator begin()
	{
		return _ht.Begin();
	}

	// end()
	iterator end()
	{
		return _ht.End();
	}

	// begin()
	const_iterator begin() const
	{
		return _ht.Begin();
	}

	// end()
	const_iterator end() const
	{
		return _ht.End();
	}


	// 插入
	pair<iterator, bool> Insert(const K& key)
	{
		return _ht.Insert(key);
	}

	// 查找
	iterator Find(const K& key)
	{
		return _ht.Find(key);
	}

	// 删除
	bool Erase(const K& key)
	{
		return _ht.Erase(key);
	}

private:
	hash_bucket::HashTable<K, const K, SetKeyOfT, Hash> _ht;
};


//void Func(const unordered_set<int>& s)
//{
//	unordered_set<int>::iterator it = s.begin();
//	while (it != s.end())
//	{
//		//*it = 1;
//		cout << *it << " ";
//		++it;
//	}
//	cout << endl;
//}

测试用例

void test_unordered_set()
{
	unordered_set<int> s;
	s.Insert(31);
	s.Insert(11);
	s.Insert(5);
	s.Insert(15);
	s.Insert(25);

	unordered_set<int>::iterator it = s.begin();
	while (it != s.end())
	{
		//*it = 1;
		cout << *it << " ";
		++it;
	}
	cout << endl;

	for (auto e : s)
	{
		cout << e << " ";
	}
	cout << endl;
}