数据结构 — 哈希表的理解与简单实现

最新推荐文章于 2020-11-05 19:50:28 发布

Dawn_sf

最新推荐文章于 2020-11-05 19:50:28 发布

阅读量744

点赞数 2

分类专栏：数据结构文章标签：哈希表哈希存储哈希表的实现

本文链接：https://blog.csdn.net/Dawn_sf/article/details/78518267

版权

数据结构专栏收录该内容

21 篇文章 2 订阅

订阅专栏

哈希表的理解与简单实现

我们刚刚认识完树的结构，比如我们的AVL树，红黑树都是查找效率很高的数据结构.但是我又发现有一种更好的搜索方式->哈希表当我

接触到Hash表的效率的时候我是不相信的. 因为不论哈希表中有多少种数据，插入和删除的时间复杂度都接近O(1).很多种应用场景效率

都是优于树的操作.树的操作最好的也是log(N)，并且！哈希表写起来并没有红黑树那么错综复杂. 所以哈希算法算是一个不错的算法

，也容易掌握. 哈希表其实是一个数组，数组中的每一个元素称为一个箱子.箱子里面存放的是键值对.它通过一个关键值的函数将所需的

数据映射到表中的位置来访问数据.这个映射函数叫做散列函数. 存放记录的数据叫做散列表.所以哈希表的存储过程如下:

1.根据Key计算出它的哈希值M.

2.假设箱子的个数为N,那么这个键值对应该放到第(M%N)个箱子中.

3.如果该箱子中已经有了键值对，就使用开放寻址法或者拉链法解决冲突.

构建哈希表的几种方法:

1.直接定制法 -- 取关键字的某个线性函数为散列地址，Hash(Key) = Key或Hash(Key) = A*Key + B.A，B为常数

2.除留余数法 -- 取关键字被某个不大于散列表的长度数p除后所得的余数为散列地址. Hash(Key) = Key % P;

3.平方取中法 -- 选择一个m位数Ni作为种子，若不足2m个，在前面补零.在这个树选中间m个数，即至的数.将结果作为Ni+1

4.折叠法 -- 所谓折叠法是将关键字分割成尾数相同的几部分(最后一部分位数可以不同)，然后取这几部分的叠加和,此方法为折叠法.

5.随机数法 -- 不解释..

哈希表还有一个重要的属性: 负载因子，它用来衡量哈希表的满足程度，一定程度也可以体现查询的效率，计算公式为:

负载因子 = 总键值对数/散列表长度

负载因子越大，意味着哈希表越满，越容易导致冲突，性能也就越低. 因此一般来讲，当负载因子大于某个常数时一般控制在0.7-0.8

之间效率最高，当负载因子满足条件时，哈希表将开始自动扩容.

这里有人就不知道哈希冲突是什么东西啦! 根据key即经过一个函数f(key)得到的结果的作为地址去存放当前的key value键值对(这个

是 hashmap的存值方式)，但是却发现算出来的地址上已经有人先来了。就是说这个地方要挤一挤啦。这就是所谓的hash冲突. 一般面对

哈希冲突会有这几种方法!

1.处理哈希冲突的闭散列方法 -- 开放定址法

1.线性探测:就是当这个位置已经有人的时候，直接在它的下一个位置存储. 如果下一个位置还有人那就一直往后走知道找到第一个

为空的位置.

2.二次探测:当这个位置有人后.设你经过i次找到了为空的位置并且存储进去了。那么下一个位置为Hash(key) = hash(key)+i^2.

2.处理哈希冲突的拉链法/开链法(哈希桶)

所谓的哈希桶就是，整个哈希表的vector当中存储的一个链表头指针. 当有数据映射这个位置之后，对该数据进行头插操作.这样就会有

效的解决哈希冲突.当然当你的一个映射位置下面节点个数过多的时候，也可以使用红黑树结构来代替该节点位置的链表结构.这样就防止

极端情况效率严重下降.

其实优化哈希冲突还会有很多方法，比如素数表. 因为使用素数对齐做哈希表的容量，这样可以让映射更分散进而降低哈希冲突. 素数表为:

static const unsigned long _PrimeList[_PrimeSize] =
	{
		53ul, 97ul, 193ul, 389ul, 769ul,
		1543ul, 3079ul, 6151ul, 12289ul, 24593ul,
		49157ul, 98317ul, 196613ul, 393241ul, 786433ul,
		1572869ul, 3145739ul, 6291469ul, 12582917ul, 25165843ul,
		50331653ul, 100663319ul, 201326611ul, 402653189ul, 805306457ul,
		1610612741ul, 3221225473ul, 4294967291ul
	};

我们介绍完它的优化方法和基本概念. 那么我们就需要过来简单的实现它,首先实现哈希表需要考虑几个问题。这里我们使用的构

建hash表的方式为除留余数法，我们的hash表可以存储任何数据所以当我们存储字符串的时候会出现一点小小的问题. 这里hash表的

创造者们早都帮我们想好了,这里网上

有很多hash字符串算法供我们使用，我们只需要使用hash字符串算法把字符串转换成可以模除的整数.所以我们需要使用仿函数的知识，

对字符串模板类型做特殊处理. 这里我们使用模板的特化，将Hash字符串算法加入到特化版本去. 然后我这里放一个链接里面就是大多

数的模板算法 <-戳这里对于模板仿函一类的嵌套使用，我们以前已经很多次用到过也有很多图帮我们理解，这里就不在多说了.并且在

Hash表的实现上面，我打算使用开放定址和哈希桶两种方法来解决我们的Hash冲突. 所以会有两段代码.我现在贴出来.

开放定址法代码:

#include<iostream>
#include<Windows.h>
#include<string>
#include<vector>
using namespace std;

static size_t GetNextPrime(size_t value)
{
	// 使用素数表对齐做哈希表的容量，降低哈希冲突
	const int _PrimeSize = 28;
	static const unsigned long _PrimeList[_PrimeSize] =
	{
		53ul, 97ul, 193ul, 389ul, 769ul,
		1543ul, 3079ul, 6151ul, 12289ul, 24593ul,
		49157ul, 98317ul, 196613ul, 393241ul, 786433ul,
		1572869ul, 3145739ul, 6291469ul, 12582917ul, 25165843ul,
		50331653ul, 100663319ul, 201326611ul, 402653189ul, 805306457ul,
		1610612741ul, 3221225473ul, 4294967291ul
	};

	for (size_t i = 0; i < _PrimeSize; ++i)
	{
		if (_PrimeList[i] > value)
		{
			return _PrimeList[i];
		}
	}

	return _PrimeList[_PrimeSize - 1];
}

template<class K>
struct __HashFunc
{
	size_t operator()(const K& key)
	{
		return key;
	}
};


template<>
struct __HashFunc<string>
{
	static size_t BKDRHash(const char * str)
	{
		unsigned int seed = 131; // 31 131 1313 13131 131313
		unsigned int hash = 0;
		while (*str)
		{
			hash = hash * seed + (*str++);
		}
		return (hash & 0x7FFFFFFF);
	}

	size_t operator()(const string& key)
	{
		return  BKDRHash(key.c_str());
	}
};

enum State
{
	EXIST,
	DELETe,
	EMPTY
};

template<class K, class V>
struct HashNode
{
	pair<K, V> _kv;
	State _state;

	HashNode()
		:_state(EMPTY)
	{}
};

template<class K, class V, class HashFunc = __HashFunc<K>>
class HashTable
{
public:

	typedef typename HashNode<K, V> Node;

	HashTable()
		:_n(0)
	{}

	bool Insert(pair<K, V>& kv)
	{
		_CheckCapacity();

		size_t i = 1;
		size_t index = _HashFunc(kv.first, _tables.size());
		while (_tables[index]._state == EXIST)
		{
			if (_tables[index]._kv.first == kv.first)
			{
				return false;
			}

			++index;
			if (index == _tables.size())
			{
				index = 0;
			}

		}

		_tables[index]._kv = kv;
		_tables[index]._state = EXIST;
		++_n;

		return true;
	}

	HashNode<K, V>* Find(const K& key)
	{
		size_t index = _HashFunc(key, _tables.size());
		while (_tables[index]._state != EMPTY)
		{
			if (_tables[index]._kv.first == key)
			{
				return &_tables[index];
			}

			++index;
			if (index == _tables.size())
			{
				index = 0;
			}

		}
		return NULL;
	}

	bool Remove(const K& key)
	{
		HashNode<K, V>* node = Find(key);
		if (node)
		{
			node->_state = DELETe;
			return true;
		}
		else
		{
			return false;
		}
	}

	size_t Size()
	{
		return _n;
	}

	size_t Capacity()
	{
		return _tables.size();
	}

protected:
	void _CheckCapacity()
	{
		if (_tables.empty())
		{
			_tables.resize(GetNextPrime(0));
			return;
		}

		if (_n * 10 / _tables.size() >= 7)
		{
			size_t newSize = GetNextPrime(_tables.size());
			HashTable<K, V, HashFunc> newHaseTable;
			newHaseTable._tables.resize(newSize);
			for (size_t i = 0; i < _tables.size(); ++i)
			{
				if (_tables[i]._state == EXIST)
				{
					newHaseTable.Insert(_tables[i]._kv);
				}
			}
			_tables.swap(newHaseTable._tables);
		}
	}


	size_t _HashFunc(const K& key, size_t size)
	{
		HashFunc hash;
		return hash((key) % size);
	}
private:
	vector<Node> _tables;
	size_t _n;
};

void Test()
{
	HashTable<int, int> T;

	T.Insert(make_pair(5, 5));
	T.Insert(make_pair(6, 6));
	T.Insert(make_pair(7, 7));
	T.Insert(make_pair(8, 8));

	T.Remove(5);
	T.Remove(6);
	system("pause");

}

哈希桶法代码:

#include<iostream>
#include<Windows.h>
#include<string>
#include<vector>
using namespace std;

static size_t GetNextPrime(size_t value)
{
	// 使用素数表对齐做哈希表的容量，降低哈希冲突
	const int _PrimeSize = 28;
	static const unsigned long _PrimeList[_PrimeSize] =
	{
		53ul, 97ul, 193ul, 389ul, 769ul,
		1543ul, 3079ul, 6151ul, 12289ul, 24593ul,
		49157ul, 98317ul, 196613ul, 393241ul, 786433ul,
		1572869ul, 3145739ul, 6291469ul, 12582917ul, 25165843ul,
		50331653ul, 100663319ul, 201326611ul, 402653189ul, 805306457ul,
		1610612741ul, 3221225473ul, 4294967291ul
	};

	for (size_t i = 0; i < _PrimeSize; ++i)
	{
		if (_PrimeList[i] > value)
		{
			return _PrimeList[i];
		}
	}

	return _PrimeList[_PrimeSize - 1];
}

template<class K>
struct __HashFunc
{
	size_t operator()(const K& key)
	{
		return key;
	}
};


template<>
struct __HashFunc<string>
{
	static size_t BKDRHash(const char * str)
	{
		unsigned int seed = 131; // 31 131 1313 13131 131313
		unsigned int hash = 0;
		while (*str)
		{
			hash = hash * seed + (*str++);
		}
		return (hash & 0x7FFFFFFF);
	}

	size_t operator()(const string& key)
	{
		return  BKDRHash(key.c_str());
	}
};

template<class K, class V>
struct HashNode
{
	pair<K, V> _kv;
	HashNode<K, V>* _next;

	HashNode(const pair<K, V>& T)
		:_kv(T)
		, _next(NULL)
	{}
};

template<class K, class V, class HashFunc = __HashFunc<K>>
class HashTable
{
	friend struct HashTableIterator<K, V, HashFunc>;
public:

	typedef typename HashNode<K, V> Node;
	typedef typename HashTableIterator<K, V, HashFunc> Iterator;

	Iterator Begin()
	{
		size_t index = 0;
		for (size_t i = 0; i < _tables.size(); ++i)
		{
			if (_tables[i] != NULL)
			{
				return Iterator(_tables[i], this);
			}
		}
		return Iterator(NULL, this);
	}

	Iterator End()
	{
		return Iterator(NULL, this);
	}

	HashTable()
		:_n(0)
	{}

	bool Insert(pair<K, V>& kv)
	{
		_CheckCapacity();

		size_t i = 1;
		size_t index = _HashFunc(kv.first, _tables.size());

		Node* newNode = new Node(kv);

		if (_tables[index] == NULL)
		{
			_tables[index] = newNode;
		}
		else
		{
			Node* cur = _tables[index];

			while (cur)
			{
				if (cur->_kv.first == kv.first)
				{
					return false;
				}
				cur = cur->_next;
			}

			newNode->_next = _tables[index];
			_tables[index] = newNode;
		}
		++_n;

		return true;
	}


	HashNode<K, V>* Find(const K& key)
	{
		size_t index = _HashFunc(key, _tables.size());

		if (_tables[index] != NULL)
		{
			Node* cur = _tables[index];
			while (cur)
			{
				if (cur->_kv.first == key)
				{
					return cur;
				}
				cur = cur->_next;
			}
		}

		return NULL;
	}

	bool Remove(const K& key)
	{
		HashNode<K, V>* node = Find(key);
		if (node)
		{
			size_t index = _HashFunc(key, _tables.size());
			Node* cur = _tables[index];
			if (_tables[index] == NULL)
				return false;

			else if (_tables[index]->_next == NULL)
			{
				delete node;
				_tables[index] = NULL;
			}
			else
			{
				while (cur)
				{
					if (cur->_next == node)
					{
						break;
					}
					cur = cur->_next;
				}
				cur->_next = node->_next;
				delete node;
			}
			--_n;
			return true;
		}
		return false;
	}

	size_t Size()
	{
		return _n;
	}

	size_t Capacity()
	{
		return _tables.size();
	}

protected:
	//当然我们在这里使用传统的Insert是没有问题的. 但是优秀的程序员需要优化代码. 那么我可不可把这个桶上面的节点
	//直接拿出来头插到对应节点的位置.
	void _CheckCapacity()
	{
		if (_tables.empty())
		{
			_tables.resize(GetNextPrime(0));
			return;
		}

		if (_n * 10 / _tables.size() >= 7)
		{
			size_t newSize = GetNextPrime(_tables.size());
			HashTable<K, V, HashFunc> newHaseTable;
			newHaseTable._tables.resize(newSize);
			for (size_t i = 0; i < _tables.size(); ++i)
			{
				Node* cur = _tables[i];
				Node* next = cur->_next;
				while (cur)
				{
					next = cur->_next;

					size_t index = _HashFunc(cur->_kv.first, newHaseTable._tables.size());

					cur->_next = newHaseTable._tables[index];
					newHaseTable._tables[index] = cur;

					cur = next;
				}
				_tables[i] = NULL;
			}
			_tables.swap(newHaseTable._tables);
		}
	}


	size_t _HashFunc(const K& key, size_t size)
	{
		HashFunc hash;
		return (hash(key) % size);
	}
private:
	vector<Node*> _tables;
	size_t _n;
};

void Test()
{
	HashTable<int, int> T;

	T.Insert(make_pair(5, 5));
	T.Insert(make_pair(58, 7));

	T.Insert(make_pair(6, 6));
	T.Insert(make_pair(7, 7));
	T.Insert(make_pair(8, 8));

	T.Remove(5);
	T.Remove(6);

}