HashTable二次探测

最新推荐文章于 2023-02-08 23:15:00 发布

MoonsQu

最新推荐文章于 2023-02-08 23:15:00 发布

阅读量597

点赞数

文章标签： Hash二次探测 hashtable 二次探测

本文链接：https://blog.csdn.net/Moons_is_me/article/details/51558600

版权

先来了解一下哈希表的二次探测的原理

当前容量为：10

定义两个size_t 类型的变量 index，i = 0；

index = 89 % 10 = 9；查看下标为9的位置为空，所以89放在下标为9的位置；

index = 18 % 10 = 8；查看下标为8的位置为空，所以18放在下标为8的位置；

index = 49 % 10 = 9；查看下标为9的位置不为空，i++；index = index + i * i ；如果index大于当前的容量，则index=index-容量，再查看index的位置是否为空，当前index=0;将49放到下标为0的位置；i = 0；

index = 58 % 10 = 8;查看下标为8的位置不为空，i++；index = index + i * i ;如果index大于当前容量，则index=index-容量，再查看index的位置是否为空，当前index=9,不为空，index要先被置为8；i++；index = index + i * i;如果index大于当前的容量，则index=index-容量，再查看index的位置是否为空，当前index = 2 ;将58放到下标为2的位置；i = 0；

数字9和上面的同理，不再赘述。

下面来看看哈希表的增删查

#include <iostream>
using namespace std;
#include <string>

enum State
{
	EMPTY,
	EXIST,
	DELETE,
};

template <class K,class V>
struct HashTableNode
{
	K _key;
	V _value;
};

template <class K>
struct __HashFunc
{
	size_t operator()(const K& key)
	{
		return key;
	}
};

template<>
struct __HashFunc<string>
{
	size_t operator()(const string& key)
	{
		size_t value = 0;
		for (size_t i = 0; i < key.size(); ++i)
		{
			value += key[i];
		}
		return value;
	}
};

template<class K, class V, class HashFunc = __HashFunc<K>>
class HashTable
{
	typedef HashTableNode<K, V> Node;
public:
	HashTable(size_t capacity = 10)
		:_tables(new Node[capacity])
		, _states(new State[capacity])
		, _size(0)
		, _capacity(capacity)
	{
		for (size_t i = 0; i < _capacity; ++i)
		{
			_states[i] = EMPTY;
		}
	
	}
	bool Insert(const K& key, const V& value)
	{
		_CheckCapacity();
		size_t index = _HashFunc(key);
		size_t Cindex = index;
		int i = 0;
		while (_states[index] == EXIST)
		{
			if (index >= _capacity)
			{
				index = index - _capacity;
			}
			if (_tables[index]._key == key)
			{
				return false;
			}
			i++;
			index = Cindex;
			index = index + i*i;

		}
		_tables[index]._key = key;
		_tables[index]._value = value;
		_states[index] = EXIST;
		++_size;
		return true;
	}

	void Delete(const K& key)
	{
		Node* ret = Find(key);
		size_t index = ret - _tables;
		_states[index] = DELETE;
		_size--;
	}

	Node*  Find(const K& key)
	{
		size_t index = _HashFunc(key);
		const size_t Cindex = index;
		size_t i = 0;
		while (_states[index] != EMPTY)
		{
			if (index > _capacity)
			{
				index = index - _capacity;
			}
			if (_tables[index]._key == key)
			{
				if (_states[index] == DELETE)
					return NULL;
				return _tables+index;
			}
			i++;
			index = Cindex;
			index = index + i*i;
		}
		return NULL;
	}
	size_t _HashFunc(const K& key)
	{
		__HashFunc<K> hashFunc;
		size_t num = hashFunc(key) % _capacity;
		return num;
		//return __HashFunc(key) % _capacity;
	}
	void Print()
	{
		size_t index = 0;
		while (index < _capacity)
		{
			if (_states[index]==EXIST)
			cout << "["<<index<<"]" << ":" << "key:" << _tables[index]._key << "    value:" << _tables[index]._value << endl;
			index++;

		}
		cout << endl;
	}


protected:
	void _CheckCapacity()
	{
		if (_size * 10 / _capacity >= 7)
		{
			HashTable<K, V> tmp(2 * _capacity);
			for(size_t i = 0; i < _capacity; ++i)
			{
				if (_states[i] == EXIST)
				{
					tmp.Insert(_tables[i]._key,_tables[i]._value);
				}
			}
			this->Swap(tmp);
		}

	}
	void Swap(const HashTable<K, V>& ht)
	{
		swap(_capacity, (size_t)ht._capacity);
		swap(_size, (size_t)ht._size);
		swap(_tables, (Node*)ht._tables);
		swap(_states, (State*)ht._states);
	}
protected:
	Node* _tables;
	State* _states;
	size_t _size;
	size_t _capacity;
	
};

void TestDict()
{
	HashTable<string, string> dict;
	dict.Insert("dict", "字典");
	dict.Insert("hash", "哈希");
	dict.Insert("function", "函数");
	dict.Insert("abcd", "函数");
	dict.Insert("bcda", "函数");

	dict.Print();

	dict.Delete("bcda");
	dict.Print();

	HashTableNode<string, string>* ret = dict.Find("hash");
	if (ret)
	{
		cout << "hash的中文：" << ret->_value << endl;
	}
	else
	{
		cout << "找不到" << endl;
	}

	HashTable<int, int> h;
	h.Insert(4, 6);
	h.Print();
	HashTableNode<int, int>* ret1 = h.Find(4);
	if (ret1)
	{
		cout << "4：" << ret1->_value << endl;
	}
	else
	{
		cout << "找不到" << endl;
	}
}

int main()
{
	TestDict();
	system("pause");
	return 0;
}

在哈希表中的元素，我用枚举类型定义了三种状态：EMPTY,DELETE,EXIST;

值得注意的是有了_Checkcapacity(),哈希表是永远不会满的；

在_Checkcapacity()函数中_size * 10 / _capacity >= 7 来作为判断条件，是因为有一个散列表的载荷银子 α = 填入表中元素的个数 / 散列表的长度; α 越过0.8，则说明哈希冲突多，CPU的命中率低。所以我将 α 的值控制在 0.7以下，提高效率。

在hash表中有一种类型是string ，要用到函数模板的特化。

dict是一个英汉字典的模拟。