c++进阶——哈希表

筑基.

于 2024-09-08 22:34:40 发布

阅读量872

点赞数 26

分类专栏： c++ 文章标签： c++ 散列表哈希算法

本文链接：https://blog.csdn.net/2401_83052144/article/details/140835980

版权

c++ 专栏收录该内容

20 篇文章 1 订阅

订阅专栏

在这里插入图片描述

嗨喽大家好呀，今天阿鑫给大家带来的是c++进阶——哈希表，好久不见啦，下面让我们进入本节博客的内容吧！

c++进阶——哈希表

枚举的介绍
unordered系列的底层结构
哈希表的改造

哈希是一种思想(映射)，哈希表(值和存储位置建立的映射关系)是一种数据结构。

1.枚举

某些具有相同类别的变量没有支持的类型，用枚举来自定义一组命名的整数常量（0，1，2）来解决这一问题
与结构体的具有多个成员不同，每个枚举类型的变量都只能选择一个成员变量，但都属于自定义类型

注意：成员只能是枚举常量，这些常量在定义时会被赋予整数值（默认为从0开始的递增整数，但可以显式指定）。是一组整数值，但是有名字，提高了代码的可读性（0，1，2三个数可以表示三种颜色）
简单说：三个名字表示三种情况，但是这三个名字底层都是整数值

在这里插入图片描述

#include <iostream>  
  
// 定义一个枚举类型Weekday，用于表示星期几  
enum Weekday {  
    Monday,  
    Tuesday,  
    Wednesday,  
    Thursday,  
    Friday,  
    Saturday,  
    Sunday  
};  
  
int main() {  
    // 声明一个Weekday类型的变量today，并将其初始化为Wednesday  
    Weekday today = Wednesday;  
  
    // 使用switch语句根据today的值执行不同的操作  
    switch (today) {  
        case Monday:  
            std::cout << "今天是星期一" << std::endl;  
            break;  
        case Tuesday:  
            std::cout << "今天是星期二" << std::endl;  
            break;  
        case Wednesday:  
            std::cout << "今天是星期三" << std::endl;  
            break;  
        case Thursday:  
            std::cout << "今天是星期四" << std::endl;  
            break;  
        case Friday:  
            std::cout << "今天是星期五" << std::endl;  
            break;  
        case Saturday:  
            std::cout << "今天是星期六" << std::endl;  
            break;  
        case Sunday:  
            std::cout << "今天是星期日" << std::endl;  
            break;  
    }  
  
    return 0;  
}

在这里插入图片描述

2. unordered系列的底层结构

unordered系列的关联式容器之所以效率比较高，是因为其底层使用了哈希结构。
在这里插入图片描述

2.1常见的哈希函数

1.直接定址法（用key的值映射一个绝对位置或相对位置）
优点：快，没有冲突
缺点：只适合范围集中
在这里插入图片描述
2.除留余数法（key模表的大小后,都可以映射到表中一个位置）
hashi = key%N（N是表的大小）
哈希冲突：不同的值取模后的值，映射到了表中的相同位置

2.2解决哈希冲突的方法

闭散列：开放地址法->去其他位置(规则)找一个空的位置
开散列：哈希桶

在这里插入图片描述

#pragma once
#include<vector>
enum State//状态标记
{
	EXIST,
	EMPTY,
	EDLETE
};
template<class K,class T>
struct HashData
{
	pair<K, V> _kv;
	State _state = EMPTY;
};

template<class K, class V>
class HashTable
{
private:
	vector<HashData<K, V>> _tables;
	size_t _n =0//表中存储数据个数
};

在这里插入图片描述

扩容时进行了Insert的复用

bool Insert(const pair<K, V>& kv)
		{
		    如果key已经存在则插入失败
			if (Find(kv.first))
				return false;

			// 扩容
			if (_n * 10 / _tables.size() >= 7)
			{
				//vector<HashData<K, V>> newTables(_tables.size() * 2);
				 遍历旧表， 将所有数据映射到新表
				 ...
				//_tables.swap(newTables);

				HashTable<K, V, Hash> newHT;
				newHT._tables.resize(_tables.size() * 2);
				for (size_t i = 0; i < _tables.size(); i++)
				{
					if (_tables[i]._state == EXIST)
					{
						newHT.Insert(_tables[i]._kv);
					}
				}

				_tables.swap(newHT._tables);
			}

			Hash hs;
			size_t hashi = hs(kv.first) % _tables.size();
			while (_tables[hashi]._state == EXIST)
			{
				++hashi;
				hashi %= _tables.size();
			}

			_tables[hashi]._kv = kv;
			_tables[hashi]._state = EXIST;
			++_n;

			return true;
		}

由于在实现Erase时，只是单纯将某个位置的状态标记置空，并没有抹除数据，所以在Find时，需要同时满足状态标记为存在以及key值相等才能返回true。

HashData<K, V>* Find(const K& key)
		{
			Hash hs;
			size_t hashi = hs(key) % _tables.size();
			while (_tables[hashi]._state != EMPTY)
			{
				if (_tables[hashi]._state == EXIST
					&& _tables[hashi]._kv.first == key)
				{
					return &_tables[hashi];
				}

				++hashi;
				hashi %= _tables.size();
			}

			return nullptr;
		}

		bool Erase(const K& key)
		{
			HashData<K, V>* ret = Find(key);
			if (ret == nullptr)
			{
				return false;
			}
			else
			{
				ret->_state = DELETE;
				return true;
			}
		}

哈希表需要支持类型之间的相互转换，因此需要提供一个哈希函数
当仿函数实现的功能相同，但是接受对象的类型不同时，就需要实现仿函数的特化

可以和整形之间相互转化

template<class K>
struct HashFunc
{
	size_t operator()(const K& key)
	{
		return (size_t)key;
	}
};

类似string类型不能和整形之间相互转换，需要支持一个特化的HashFunc

template<>
struct HashFunc<string>
{
	size_t operator()(const string& key)
	{
		size_t hash = 0;
		for (auto e : key)
		{
			hash *= 31;
			hash += e;
		}

		return hash;
	}
}

在这里插入图片描述

namespace hash_bucket
{
	template<class K, class V>
	struct HashNode
	{
		pair<K, V> _kv;
		HashNode<K, V>* _next;

		HashNode(const pair<K, V>& kv)
			:_kv(kv)
			,_next(nullptr)
		{}
	};

	template<class K, class V, class Hash = HashFunc<K>>
	class HashTable
	{
		typedef HashNode<K, V> Node;
	public:
		HashTable()
		{
			_tables.resize(10, nullptr);
		}
	private:


		vector<Node*> _tables;  // 指针数组
		size_t _n = 0;			// 表中存储数据个数
	};

在这里插入图片描述
和闭散列直接插入(先new节点，再delete节点)不同，节点的重复利用可以节省时间

bool Insert(const pair<K, V>& kv)
		{
			Hash hs;
			size_t hashi = hs(kv.first) % _tables.size();

			// 负载因子==1扩容
			if (_n == _tables.size())
			{
				/*HashTable<K, V> newHT;
				newHT._tables.resize(_tables.size() * 2);
				for (size_t i = 0; i < _tables.size(); i++)
				{
					Node* cur = _tables[i];
					while(cur)
					{
						newHT.Insert(cur->_kv);
						cur = cur->_next;
					}
				}

				_tables.swap(newHT._tables);*/

				vector<Node*> newtables(_tables.size() * 2, nullptr);
				for (size_t i = 0; i < _tables.size(); i++)
				{
					Node* cur = _tables[i];
					while (cur)
					{
						Node* next = cur->_next;

						// 旧表中节点，挪动新表重新映射的位置
						size_t hashi = hs(cur->_kv.first) % newtables.size();
						// 头插到新表
						cur->_next = newtables[hashi];
						newtables[hashi] = cur;
						
						cur = next;
					}

					_tables[i] = nullptr;
				}

				_tables.swap(newtables);
			}

			// 头插
			Node* newnode = new Node(kv);
			newnode->_next = _tables[hashi];
			_tables[hashi] = newnode;
			++_n;

			return true;
		}

节点的删除分为头节点和其他节点
头节点：指针数组中的指针指向头节点的下一个节点
其余节点：当前节点的前一个节点指向当前节点的下一个节点，释放当前节点
在这里插入图片描述

bool Erase(const K& key)
{
	Hash hs;
	size_t hashi = hs(key) % _tables.size();
	Node* prev = nullptr;
	Node* cur = _tables[hashi];
	while (cur)
	{
		if (cur->_kv.first == key)
		{
			if (prev == nullptr)
			{
				_tables[hashi] = cur->_next;

			}
			else//此时说明销毁节点不是头节点
			{
				prev->_next = cur->_next;
			}
			delete cur;
			--_n;
			return true;
		}
		prev = cur;
		cur = cur->_next;
		
	}
	return false;
}

下面给出一份完整的闭散列解决哈希冲突的实现代码

namespace hash_bucket
{
	template<class K, class V>
	struct HashNode
	{
		pair<K, V> _kv;
		HashNode<K, V>* _next;

		HashNode(const pair<K, V>& kv)
			:_kv(kv)
			,_next(nullptr)
		{}
	};

	template<class K, class V, class Hash = HashFunc<K>>
	class HashTable
	{
		typedef HashNode<K, V> Node;
	public:
		HashTable()
		{
			_tables.resize(10, nullptr);
		}

		~HashTable()
		{
			// 依次把每个桶释放
			for (size_t i = 0; i < _tables.size(); i++)
			{
				Node* cur = _tables[i];
				while (cur)
				{
					Node* next = cur->_next;
					delete cur;
					cur = next;
				}
				_tables[i] = nullptr;
			}
		}

		bool Insert(const pair<K, V>& kv)
		{
			Hash hs;
			size_t hashi = hs(kv.first) % _tables.size();

			// 负载因子==1扩容
			if (_n == _tables.size())
			{
				/*HashTable<K, V> newHT;
				newHT._tables.resize(_tables.size() * 2);
				for (size_t i = 0; i < _tables.size(); i++)
				{
					Node* cur = _tables[i];
					while(cur)
					{
						newHT.Insert(cur->_kv);
						cur = cur->_next;
					}
				}

				_tables.swap(newHT._tables);*/

				vector<Node*> newtables(_tables.size() * 2, nullptr);
				for (size_t i = 0; i < _tables.size(); i++)
				{
					Node* cur = _tables[i];
					while (cur)
					{
						Node* next = cur->_next;

						// 旧表中节点，挪动新表重新映射的位置
						size_t hashi = hs(cur->_kv.first) % newtables.size();
						// 头插到新表
						cur->_next = newtables[hashi];
						newtables[hashi] = cur;
						
						cur = next;
					}

					_tables[i] = nullptr;
				}

				_tables.swap(newtables);
			}

			// 头插
			Node* newnode = new Node(kv);
			newnode->_next = _tables[hashi];
			_tables[hashi] = newnode;
			++_n;

			return true;
		}

		Node* Find(const K& key)
		{
			Hash hs;
			size_t hashi = hs(key) % _tables.size();
			Node* cur = _tables[hashi];
			while (cur)
			{
				if (cur->_kv.first == key)
				{
					return cur;
				}

				cur = cur->_next;
			}

			return nullptr;
		}

		bool Erase(const K& key)
		{
			Hash hs;
			size_t hashi = hs(key) % _tables .size();
			Node* prev = nullptr;
			Node* cur = _tables[hashi];
			while (cur)
			{
				if (cur->_kv.first == key)
				{
					if (prev == nullptr)
					{
						_tables[hashi] = cur->_next;
					}
					else
					{
						prev->_next = cur->_next;
					}

					delete cur;
					--_n;
					return true;
				}

				prev = cur;
				cur = cur->_next;
			}

			return false;
		}

	private:
		//vector<list<pair<K, V>>> _tables;  // 指针数组
		//struct Bucket
		//{
		//	list<pair<K, V>> _lt;
		//	map<K, V> _m;
		//	size_t _bucketsize;   // >8 map  <=8 list
		//};
		//vector<Bucket> _tables;

		vector<Node*> _tables;  // 指针数组
		size_t _n = 0;			// 表中存储数据个数
	};