哈希表（hashTable）_哈希一对多-CSDN博客

本文链接：https://blog.csdn.net/qq_67693066/article/details/135673734

哈希表（hashTable）

unrordered_map和unordered_set
哈希映射
哈希碰撞
- 闭散列
- 开散列（哈希桶）
字符串哈希函数

我们之前了解了红黑树，今天我们来了解它的衍生——哈希表。

unrordered_map和unordered_set

我们之前学习过map和set，其实我们的unrordered_map，unordered_set和map，set用法大同小异：

#include<iostream>
#include<unordered_map>
#include<unordered_set>
#include<map>
#include<set>
#include<string>
#include<stdlib.h>
using namespace std;

int main()
{
	srand(time(0));
	//map和set
	set<int> My_set;

	for (int i = 0; i < 10; i++)
	{
		My_set.insert(rand() % 100 + 1);
	}

	for (auto e : My_set)
	{
		cout << e << " " ;
	}

	cout << endl;

	map<string, int> My_map;
	My_map.insert(make_pair("cccc", 89));
	My_map.insert(make_pair("aaaa", 21));
	My_map.insert(make_pair("ffff", 2));
	My_map.insert(make_pair("bbbb", 12));

	for (auto e : My_map)
	{
		cout << e.first << "->" << e.second << endl;
	}
	cout << endl;
}

map和set的结果是有序的：
在这里插入图片描述
我们来看看unrordered_map和unordered_set的结果：

	srand(time(0));
	//map和set
	unordered_set<int> My_set;

	for (int i = 0; i < 10; i++)
	{
		My_set.insert(rand() % 100 + 1);
	}

	for (auto e : My_set)
	{
		cout << e << " ";
	}

	cout << endl;

	unordered_map<string, int> My_map;
	My_map.insert(make_pair("cccc", 89));
	My_map.insert(make_pair("aaaa", 21));
	My_map.insert(make_pair("ffff", 2));
	My_map.insert(make_pair("bbbb", 12));

	for (auto e : My_map)
	{
		cout << e.first << "->" << e.second << endl;
	}
	cout << endl;

在这里插入图片描述
一般情况下而言unrordered_map和unordered_set的性能要比map和set好的多，因为
unrordered_map和unordered_set不用排序。

哈希映射

哈希表基本的的一个概念就是哈希映射，什么意思呢？就是通过某种关系，使两个对象产生唯一的联系。这种关系可以是自然的，也可以是人为规定的。

比如说：学生有学号，有姓名，我们可以通过直接让学号和姓名绑定，实现学号对应姓名这种映射。
还有我们常见的数组，其实某种意义上也是一种哈希映射，数组里面的内容是挨着存放的（0,1,2,3,4…），这样可以实现数组下标查找对应内容的功能。我们也可以自己规定：单数的数组下标存放数字，双数的不做处理。这样也可以实现某两个事物之间产生唯一的联系。

通过哈希映射，可以通过这种关系，实现对数据的快速查找。

哈希碰撞

哈希碰撞的意思是：通过某种关系，产生了一对多的对应关系。我们来举个例子：
我现在有一组数：

int a[] = {1,22,44,55,773,12,34,56}

我现在规定数组里面的数模10之后，结果为该数字的储存的对应的下标。

在这里插入图片描述为了解决哈希冲突，我们的想办法处理有冲突的元素：
解决哈希冲突两种常见的方法是：闭散列和开散列

闭散列

闭散列：也叫开放定址法，当发生哈希冲突时，如果哈希表未被装满，说明在哈希表中必然还有空位置，那么可以把key存放到冲突位置中的“下一个” 空位置中去。

那么如何找下一个位置呢？

线性探测：从发生冲突的位置开始，依次向后探测，直到寻找到下一个空位置为止。

我们可以根据这个，模拟一个简单的哈希表

#pragma once
#include<iostream>
#include<vector>
#include<string>
using namespace std;

namespace My_HashTable
{
	enum Staus //状态
	{
		EMPTY,
		EXIST,
		DELETE	
	};

	template<class K,class V>
	struct HashData
	{
		pair<K, V> _kv; //键值对
		Staus _s; //状态
	};

	template<class K>
	struct HashFunc
	{
		size_t operator()(const K& key)
		{
			return (size_t)key;
		}
			
	};

	template<>
	struct HashFunc<string>
	{
		size_t operator()(const string& key)
		{
			int hash = 0;
			for (auto e : key)
			{
				hash *= 31;
				hash += e;
			}
			return hash;
		}
	};

	template<class K, class V,class Hash = HashFunc<K>>
	class HashTable
	{
	public:

		HashTable()
		{
			_tables.resize(10);
		}
		//寻找
		HashData<K, V>* Find(const K& key)
		{
			Hash hf;
			//哈希映射
			int _number = hf(key) % _tables.size();

			while (_tables[_number]._s != EMPTY)
			{
				if (_tables[_number]._kv.first == key 
					&&
					_tables[_number]._s == EXIST)
				{
					return &_tables[_number];
				}

				_number++;

				_number %= _tables.size();


			}
			return nullptr;
		}

		bool insert(const pair<K,V>& kv)
		{
			Hash hf;
			//满了（负载因子）
			if ((double)_n / _tables.size() == 0.7)
			{
				size_t newSize = _tables.size() * 2;
				HashTable<K, V,HashFunc<K>> newTB; //新表
				newTB._tables.resize(newSize);
				//遍历旧表
				for (int i = 0; i < _tables.size(); i++)
				{
					if (_tables[i]._s == EXIST)
					{
						newTB.insert(_tables[i]._kv);
					}
				}
				_tables.swap(newTB._tables);
			}
			
			//未满
			size_t _number = hf(kv.first) % _tables.size(); //哈希函数

			while (_tables[_number]._s == EXIST)
			{
				_number++;

				_number %= _tables.size(); //回到开始位置
			}

			//插入
			_tables[_number]._kv = kv;
			_tables[_number]._s = EXIST;
			++_n; 

			return true;
		}

		//删除
		bool Erease(const K& key)
		{
			HashData<K,V>* ret = Find(key);
			if (ret->_s == EXIST)
			{
				ret->_s = DELETE;
				--_n;
				return true;
			}
			else
			{
				return false;
			}
		}

		//打印
		void PrintHashTable()
		{
			for (int i = 0; i < _tables.size(); i++)
			{
				if (_tables[i]._s == EXIST)
				{
					cout << "[" << _tables[i]._kv.first << "]" << "->" << "EXSIT"<< "->" << _tables[i]._kv.second << endl;
				}
				else if (_tables[i]._s == EMPTY)
				{
					cout << "[" << _tables[i]._kv.first << "]" << "->" << "EMPTY" << "->" << _tables[i]._kv.second << endl;
				}
				else if (_tables[i]._s == DELETE)
				{
					cout << "[" << _tables[i]._kv.first << "]" << "->" << "DELETE"<< "->" << _tables[i]._kv.second << endl;
				}
			}
		}

	private:
		vector<HashData<K,V>> _tables;
		size_t _n; //存储数据的个数
	};


	void Test1()
	{
		int a[] = { 11,2,44,24,54,7,9,12,32,94,95 };
		HashTable<int,int> hash;

		for (auto e : a)
		{
			hash.insert(make_pair(e, e));
		}

		hash.Erease(24);

		hash.insert(make_pair(4, 4));
		hash.PrintHashTable();

	}

	void Test2()
	{
		string arr[] = { "香蕉","苹果","樱桃","苹果","梨子","西瓜" };
		HashTable<string, int> hash;
		for (auto e : arr)
		{
			HashData<string, int>* ret = hash.Find(e);
			if (ret)
			{
				ret->_kv.second++;
			}
			else
			{
				hash.insert(make_pair(e, 1));
			}
		}

		hash.PrintHashTable();
	}
}

开散列（哈希桶）

开散列的话（也叫做拉链法，哈希桶法），就是用一个指针数组存放链表，符合条件的挂在一条链表上：

在这里插入图片描述
哈希桶最大的好处就是，当发生哈希冲突的时候，不会占用其他位置，就会自己原地解决。

#pragma once
#include<iostream>
#include<vector>
#include<string>

namespace Hash_buckets
{
	//解决字符串转换
	template<class K> //如果是数值直接使用
	struct HashFuc
	{
		size_t operator()(const K& key)
		{
			return (size_t)key;
		}
	};

	template<>
	struct HashFuc<std::string> //字符串则进行转化
	{
		size_t operator()(const std::string& key)
		{
			size_t hash = 0;
			for (auto e : key)
			{
				hash *= 31;
				hash += e;
			}
			return hash;
		}
	};

	//哈希桶结点
	template<class K,class V>
	struct Hash_Node
	{
		//结点
		Hash_Node* _next; //下一个结点
		std::pair<K, V> _kv;

		//构造函数
		Hash_Node(const std::pair<K,V> kv)
			:_next(nullptr)
			,_kv(kv)
		{

		}
	};

	template<class K, class V, class Hash = HashFuc<K>>
	class HashTable
	{
	public:
		typedef Hash_Node<K, V> _Node;
		
		//构造函数
		HashTable()
		{
			_tables.resize(10);
		}

		//析构函数
		~HashTable()
		{
			for (int i = 0; i < _tables.size(); i++)
			{
				_Node* cur = _tables[i];

				while (cur)
				{
					_Node* next = cur->_next;
					delete cur;
					cur = next;
				}
				_tables[i] = nullptr;
			}
		}

		bool Insert(const std::pair<K,V> kv)
		{
			Hash hf;

			if (Find(kv.first))
				return false;

			//判断是否需要扩容
			if (_n == _tables.size())
			{
				//新开的大小
				size_t newSize = _tables.size() * 2;
				HashTable<K, V> newHT;
				newHT._tables.resize(newSize);

				//遍历旧表
				for (size_t i = 0; i < _tables.size(); i++)
				{
					_Node* cur = _tables[i];
					while (cur)
					{
						newHT.Insert(cur->_kv);
						cur = cur->_next;
					}
				}
				_tables.swap(newHT._tables);
			}

			//插入
			//哈希函数
			size_t hashi = hf(kv.first) % _tables.size();
			_Node* newnode = new _Node(kv);

			//头插
			newnode->_next = _tables[hashi];
			_tables[hashi] = newnode;
			++_n; // 个数加一

			return true;
			
		}

		void ShowHash()
		{
			std::cout << "共有" << _tables.size() << "条链" << std::endl;
			for (int i = 0; i < _tables.size(); i++)
			{
				_Node* cur = _tables[i];
				while (cur)
				{
					std::cout << "第" << i << "链上的元素有" << cur->_kv.first <<"共"<<cur->_kv.second<<"个" << std::endl;
					cur = cur->_next;
				}
				std::cout << "######################" << std::endl;
			}
		}

		_Node* Find(const K& key)
		{
			Hash hf;
			//哈希函数
			size_t hashi = hf(key) % _tables.size();

			_Node* cur = _tables[hashi];

			while (cur)
			{
				if (cur->_kv.first == key)
				{
					return cur;
				}
				cur = cur->_next;
			}

			return nullptr;
		}

		bool Erase(const K& key)
		{
			Hash hf;

			size_t hashi = hf(key) % _tables.size();
			_Node* prve = nullptr;
			_Node* cur = _tables[hashi];

			while (cur)
			{
				if (cur->_kv.first == key)
				{
					if (prve == nullptr)
					{
						_tables[hashi] = cur->_next;
					}
					else
					{
						prve->_next = cur->_next
					}

					delete cur;
				}
				prve = cur;
				cur = cur->_next;
			}

			return false;
		}
	private:
		//哈希表
		std::vector <_Node*> _tables;
		size_t _n = 0; //个数
	};

	void TestHT1()
	{
		HashTable<int, int> hash;
		int a[] = { 1,20,20,20,30,40,2,3,4,55,23,89,51,23,44 };

		for (auto e : a)
		{
			hash.Insert(std::make_pair(e, e));
		}

		hash.ShowHash();

	}

	void TestHT2()
	{
		std::string arr[] = { "香蕉", "甜瓜","苹果", "西瓜", "苹果", "西瓜", "苹果", "苹果", "西瓜", "苹果", "香蕉", "苹果", "香蕉" };
		HashTable<std::string, int> ht;
		for (auto& e : arr)
		{
			//auto ret = ht.Find(e);
			Hash_Node<std::string, int>* ret = ht.Find(e);
			if (ret)
			{
				ret->_kv.second++;
			}
			else
			{
				ht.Insert(std::make_pair(e, 1));
			}
		}
		ht.ShowHash();
	}
}

字符串哈希函数

如果大家看的仔细的话，我们的测试用例里有字符串：

std::string arr[] = { "香蕉", "甜瓜","苹果", "西瓜", "苹果", "西瓜", "苹果", "苹果", "西瓜", "苹果", "香蕉", "苹果", "香蕉" };

之前我们可以直接插入数值，直接利用该值为我们插入删除进行服务。但是，计算机不认识字符串，不可能直接用字符串进行操作，唯一的方法就是我们可不可以把字符串转化成数字后，再用处理后的数字进行操作

一开始我们想到的是把字符串中的ASCII码相加：

string str = "hello My World";

int hash = 0; //累加
for(auto e : str)
{
  hash += e;
}

最后得到的hash就可以作为键值对为我们后面的操作进行服务。但是这样的算法没有考虑到顺序问题。比如abcdef和bcdeaf，两个字符串累加后得到的数值是一样的，还是会引发哈希冲突，该怎么办呢？

所以就有一大堆的学者去研究这个问题，得到了许多比较高效的字符串hashcode算法，具体的大家可以在网站上进行搜索，这里不再赘述
（如果大家搜过的话，目前常见的字符串hash算法有BKDRHash，APHash，DJBHash，JSHash，RSHash，SDBMHash，PJWHash，ELFHash等等）

而我们这里的是用的是BKDR算法：

    //解决字符串转换
	template<class K> //如果是数值直接使用
	struct HashFuc
	{
		size_t operator()(const K& key)
		{
			return (size_t)key;
		}
	};

	template<>
	struct HashFuc<std::string> //字符串则进行转化
	{
		size_t operator()(const std::string& key)
		{
			size_t hash = 0;
			for (auto e : key)
			{
				hash *= 31;
				hash += e;
			}
			return hash;
		}
	};