数据结构之哈希表二（用开散列法实现哈希表）

最新推荐文章于 2024-10-06 16:53:57 发布

hj605635529

最新推荐文章于 2024-10-06 16:53:57 发布

阅读量1.4k

点赞数 1

分类专栏： C/C++ 文章标签：散列函数数据结构面试哈希表

本文链接：https://blog.csdn.net/hj605635529/article/details/70705736

版权

C/C++ 专栏收录该内容

36 篇文章 0 订阅

订阅专栏

前面我们讲了哈希冲突的闭散列（开放定址法）的两种方法，现在我们来介绍另一种方法：开散列法。

通常，每个桶中的同义词子表都很短，设有n个关键码通过某一个散列函数，存放到散列表中的m个桶中，那么每一个桶中的同义词子表的平均长度为n/m。这样以搜索平均长度为n/m的同义词子表代替了搜索长度为n的顺序表，搜索效率快的多。

应用开散列法处理溢出，需要增设链接指针，似乎增加了存储开销。事实上，由于闭散列法必须保持大量的空闲空间以确保搜索效率，如二次探查法要求装载因子a <= 0.5 而表项所占空间又比指针大的多，所以使用开散列法反而比开地址法节省存储空间。

开散列（拉链法）和闭散列（直接定址法）的区别：

（1）拉链法没有堆积现象即非同义词不会发生冲突，减少了平均查找长度。
（2）拉链法删除节点是真正的删除，而直接定址法删除元素是在删除元素的节点做删除标记。
（3）对于多元素的哈希表，拉链法比直接定址法节约空间，因为直接定址法要冲突少，就需要装填因子较小，二次探测要求装填因子<0.5,浪费大量空间，但是拉链法的装填因子可以>1
（4）拉链法中各链表的节点是动态申请的，更适合于表前无法确定表长的情况。

开散列代码：

#pragma  once
#include<iostream>
using namespace std;
#include <cassert>
#include<vector>


template<class K,class V>
struct HashTableNode
{
	HashTableNode(const K &key,const V &value)
		:_key(key)
		,_value(value)
		,_next(NULL)
	{  }

	HashTableNode<K,V> * _next;
	K _key;
	V _value;
};


template<class K,class V>
class HashTableBucket
{
public:
	HashTableBucket(size_t capacity = 10)
		:_size(0)
	{
		capacity = GetNextPrim(capacity);  //采用素数减少冲突。
		_table.resize(capacity);  //创建capacity个元素，每个元素都初始化为0(NULL).
		/*for (size_t idx = 0; idx < capacity; ++idx)
		{
		_table[id] = NULL;
		}*/    //这个和上面那句话应该是重复的。
	}

	bool Insert(const K& key,const V& value) //节点头插
	{
		//扩容
		CheckCapacity();
		size_t idex = HashFun(key);
		HashTableNode<K,V> *pCur = _table[idex];
	
		//插入的时候先查找这个元素是不是存在，
		while (pCur)
		{
			if (pCur->_key == key)
			{
				return false;
			}
	
			pCur = pCur->_next;
		}

		HashTableNode<K,V> *newNode = new HashTableNode<K,V>(key,value);
		newNode->_next = _table[idex];
		_table[idex] = newNode;
		_size++;

		return true;
	}

	bool Find(const K & key)
	{
		size_t idx = HashFun(key);

		HashTableNode<K,V> *pCur = _table[idx];
		while (pCur)
		{
			if (pCur->_key == key)
			{
				return true;
			}
			pCur = pCur->_next;
		}

		return false;
	}

	bool Remove(const K & key)
	{
		size_t index = HashFun(key);

		HashTableNode<K,V> *pCur = _table[index];
		HashTableNode<K,V> *pPre = NULL;

		while (pCur)
		{
			if (pCur->_key == key)
			{
				HashTableNode<K,V> *pDel = pCur;
				if (pCur == _table[index]) //删除的是第一个节点，头指向需要修改。
				{
					_table[index] = pCur->_next;
				}
				else
				{
					pPre->_next = pDel->_next;
					
				}
				delete pDel;
				_size--;
				return true;
				
			}

			pPre = pCur;
			pCur = pCur->_next;
		}

		return false;
	}

void Clear()
{
	for (size_t idx = 0; idx < _table.size(); ++idx)
	{
		HashTableNode<K,V> *pCur = _table[idx];

		while (pCur)
		{
			HashTableNode<K,V> *pDel = pCur;
			pCur = pCur->_next;
			delete pDel;
		}
		_table[idx] = NULL;
	
	}
	_size = 0;
}

~HashTableBucket()
{
	Clear();
}



private:

	size_t HashFun(const K& key)
	{
		return key % _table.capacity();
	}

	void CheckCapacity()
	{
		if (_size == _table.size())
		{
			HashTableBucket<K,V> temp(GetNextPrim(_size)); //创建一个更大容量临时对象

			for (size_t idx = 0; idx < _table.size(); ++idx)  //将原来哈希表中的元素插入到新的哈希表中。
			{
				HashTableNode<K,V> *pCur = _table[idx];  //这里的插入不是创建新的节点，而是修改原来的节点的指向。
				HashTableNode<K,V> *pPre = NULL;

				while (pCur)
				{
					pPre = pCur;
					pCur = pCur->_next;

					size_t idx = temp.HashFun(pPre->_key); //这里如果不是调用temp.HashFun，就会调用原来的哈希函数。
					pPre->_next = temp._table[idx];  //头插到新的哈希表中。
					temp._table[idx] = pPre;
					temp._size++;
				}
				_table[idx] = NULL;   //把原来的链表都摘下来后，得将相对应的桶置空。
			}

			swap(_table,temp._table);
			swap(_size,temp._size);
		}
	}

	size_t GetNextPrim(size_t prev) 
	{
		const int _PrimeSize = 28;
		static const unsigned long _PrimeList [_PrimeSize] =
		{
			53ul,         97ul,         193ul,       389ul,       769ul,
			1543ul,       3079ul,       6151ul,      12289ul,     24593ul,
			49157ul,      98317ul,      196613ul,    393241ul,    786433ul,
			1572869ul,    3145739ul,    6291469ul,   12582917ul,  25165843ul,
			50331653ul,   100663319ul,  201326611ul, 402653189ul, 805306457ul,
			1610612741ul, 3221225473ul, 4294967291ul
		}; //素数表

		for (size_t idx = 0; idx < _PrimeSize; ++idx)
		{
			if (prev < _PrimeList[idx])
			{
				return _PrimeList[idx];
			}
		}
		return -1;
	}

private:
	vector<HashTableNode<K,V>* > _table; //哈希桶，每一个桶中的元素都是一个指针，指向一个单链表。
	size_t _size;
};

#include "hashtable的开散列.hpp"



void test3()
{
	HashTableBucket<int,int> ht;
	ht.Insert(1,1);
	ht.Insert(21,2);
	ht.Insert(31,3);
	ht.Insert(41,4);
	ht.Insert(3,5);
	ht.Insert(6,3);
	//ht.Remove(21);
	//ht.Remove(41);
	ht.Remove(3);

	cout << ht.Find(31)<<endl;
	cout << ht.Find(44)<<endl;

	
}


//void test3Capacity()
//{
//	HashTableBucket<int,int> ht;
//	ht.Insert(1,1);
//	ht.Insert(21,2);
//	ht.Insert(31,3);
//	ht.Insert(41,4);
//	ht.Insert(3,5);
//	ht.Insert(6,3);
//	ht.Insert(9,8);
//	ht.Insert(5,7);
//	ht.Insert(19,4);
//	ht.Insert(34,98);
//	ht.Insert(37,64);
//	ht.Insert(98,23);
//	ht.Insert(17,43);
//
//}


int main()
{

	test3();
	
	cout << "hello..." <<endl;
	system("pause");
	return 0;
}

开散列的优化代码：加上迭代器设计

#pragma once
#include<iostream>
using namespace std;
#include <vector>
#include<cassert>
#include<string>

template<class K,class V>
struct HashTableNode
{
	HashTableNode(const K& key,const V & value)
		:_next(NULL)
		,_kv(key,value)
	{     }

	HashTableNode *_next;
	pair<K,V> _kv;
};

template<class K,class V,class _HashFunc = HashFunDef<K> >
class HashTableBucket;


//迭代器
template<class K, class V,class Ref,class Ptr, class _HashFunc=HashFunDef<K>>
struct HashBucketIterator
{
	typedef HashTableNode<K,V> Node;
	typedef HashTableBucket<K,V,_HashFunc> HashTable;//需要前置声明。
	typedef HashBucketIterator<K,V,Ref,Ptr,_HashFunc> Self; //迭代器，也是一种类型。

	HashBucketIterator():_node(NULL),_ht(NULL)
	{   }
	HashBucketIterator(Node *node,HashTable *ht):_node(node),_ht(ht)
	{   }

	Ref operator*()   //键值对的引用。
	{
		return _node->_kv;
	}

	Ptr operator->()
	{
		return  &(operator*());
	}

	Self& operator++()  //指针加加还是一个指针，前置加加返回一个引用
	{
		_node = _Next(_node);
		return *this;
	}

	Self operator++(int)
	{
		Self temp(*this);
		++(*this);
		return temp;
	}

	bool operator==(const Self & self) const
	{
		return _node == self._node;
	}
	bool operator!=(const Self & self)const
	{
		return !(*this == self);
	}

private:
	Node * _Next(Node * node)
	{
		Node * next = node->_next;  //next可能还是这个桶中的元素，当next为空，说明要返回下一个非空的桶中的元素。

		if (next)
		{
			return next;
		}
		else  //下一个节点就是下一个非空的桶中的第一个节点
		{
			size_t index = _ht->HashFun(node->_kv.first); //这个节点所在桶的编号。
			for (size_t  i = index + 1; i < _ht->_table.capacity();++i)  //找下一个非空的桶
			{
				Node *pCur = _ht->_table[i]; //这里访问了哈希表的私有成员，所以要声明这个类是哈希表的友元。
				if (pCur)
				{
					return pCur;
				}
			}
			return NULL;
		}

	}


private:
	Node *_node;  //一个指向节点的指针。
	HashTable *_ht;  //一个指向哈希表的指针（这个哈希表的实现是用了vector,每个元素类型都是一个指针，指向单链表。
};                  //为啥要用到了这个是因为重载++符号用到了。


template<class K>
class HashFunDef   //哈希默认函数处理整型
{

public:
	size_t operator()(const K &key)
	{
		return key;
	}
};

//字符串转换为整型的一个函数。
static size_t BKDRHash(const char *str)
{
	unsigned int seed = 131;
	unsigned int hash = 0;
	while (*str)
	{
		hash = hash *seed +(*str++);
	}
	return (hash & 0x7FFFFFFF);
}


template<>
class HashFunDef<string>  //函数对象的特例化,特例化为字符串
{
public:
	size_t operator()(const string &key)
	{
		return BKDRHash(key.c_str());
	}

};




template<class K,class V,class _HashFunc = HashFunDef<K> >
class HashTableBucket
{
public:
	friend struct HashBucketIterator<K,V,pair<K,V>&,pair<K,V> *,_HashFunc>;
	typedef HashBucketIterator<K,V,pair<K,V>&,pair<K,V>*,_HashFunc> Iterator;
public:
	HashTableBucket(size_t capacity = 10):_size(0)
	{
		capacity = GetNextPrim(capacity);
		_table.resize(capacity);
	}

	pair<bool,Iterator> Insert(const pair<K,V> &kv)
	{
		CheckCapacity();
		size_t idx = HashFun(kv.first);

		HashTableNode<K,V> *pcur = _table[idx];
		while (pcur)
		{
			if (pcur->_kv.first == kv.first)
			{
				return make_pair(false,Iterator());
			}
			pcur = pcur->_next;
		}

		HashTableNode<K,V> *newNode = new HashTableNode<K,V>(kv.first,kv.second);
		newNode->_next = _table[idx];
		_table[idx] = newNode;
		_size++;
		return make_pair(true,Iterator(newNode,this));
	}

	bool Find(const K & key)
	{
		size_t idx = HashFun(key);
		HashTableNode<K,V> *pcur = _table[idx];
		while (pcur)
		{
			if (pcur->_kv.first == key)
			{
				return true;
			}
			pcur = pcur->_next;
		}
		return false;
	}

	bool Remove(const K & key)
	{
		size_t idx = HashFun(key);

		HashTableNode<K,V> *pCur = _table[idx];
		HashTableNode<K,V> *pPre = NULL;

		while (pCur)
		{
			if (pCur->_kv.first == key)
			{
				HashTableNode<K,V> *pDel = pCur;
				if (pCur == _table[idx])
				{
					_table[idx] = pCur->_next;
				}
				else
				{
					pPre->_next = pCur->_next;
				}

				delete pDel;
				_size--;
				return true;

			}

			pPre = pCur;
			pCur = pCur->_next;

		}

		return false;
	}

	void Clear()
	{
		for (size_t i = 0; i < _table.capacity(); ++i)
		{
			HashTableNode<K,V> *pCur = _table[i];
			HashTableNode<K,V> *pDel = NULL;
			while (pCur)
			{
				pDel = pCur;
				pCur = pCur->_next;
				delete pDel;
			}
			_table[i] = NULL;
		}
		_size = 0;
	}

	~HashTableBucket()
	{
		Clear();
	}

	V& operator[](const K &key)
	{
		size_t index = HashFun(key);
		HashTableNode<K,V> *pcur = _table[index];

		while (pcur)
		{
			if (pcur->_kv.first == key)
			{
				return pcur->_kv.second;
			}
			pcur = pcur->_next;
		}
		assert (false);
	}

Iterator Begin()  //返回哈希表中第一个元素的位置。
{
	for (size_t index = 0; index < _table.capacity();++index)
	{
		HashTableNode<K,V> *pCur = _table[index];
		if (pCur)
		{
			return Iterator(pCur,this);
		}
	}
}

Iterator End()
{
	return Iterator(NULL,NULL);
}


private:

	size_t HashFun(const K & key)
	{
		//return key % _table.capacity();
		return ( _HashFunc()(key) % _table.capacity());
	}

	void CheckCapacity()
	{
		if (_size == _table.capacity())
		{
			HashTableBucket<K,V> temp(GetNextPrim(_size));  //扩容到下一个素数。

			for (size_t i = 0; i < _table.capacity(); ++i)
			{
				HashTableNode<K,V> *pcur = _table[i];
				HashTableNode<K,V> *pPre = NULL;
				while (pcur)
				{
					size_t index = temp.HashFun(pcur->_kv.first); //放入新的桶中

					pPre = pcur;
					pcur = pcur->_next;

					pPre->_next = temp._table[index];
					temp._table[index] = pPre;
					temp._size++;
				}
				_table[i] = NULL;
			}

			swap(_table,temp._table);
			swap(_size,temp._size);
		}
	}


	size_t GetNextPrim(size_t prev) 
	{
		const int _PrimeSize = 28;
		static const unsigned long _PrimeList [_PrimeSize] =
		{
			53ul,         97ul,         193ul,       389ul,       769ul,
			1543ul,       3079ul,       6151ul,      12289ul,     24593ul,
			49157ul,      98317ul,      196613ul,    393241ul,    786433ul,
			1572869ul,    3145739ul,    6291469ul,   12582917ul,  25165843ul,
			50331653ul,   100663319ul,  201326611ul, 402653189ul, 805306457ul,
			1610612741ul, 3221225473ul, 4294967291ul
		}; //素数表

		for (size_t idx = 0; idx < _PrimeSize; ++idx)
		{
			if (prev < _PrimeList[idx])
			{
				return _PrimeList[idx];
			}
		}
		return -1;
	}

private:
	vector<HashTableNode<K,V>* > _table;
	size_t _size;
};

#include "hashtable的开散列的优化.hpp"


void test4()
{
		HashTableBucket<int,int> ht;
		ht.Insert(pair<int,int>(1,1));
		ht.Insert(pair<int,int>(21,2));
		ht.Insert(make_pair(31,3)); //make_pair是一个模板函数，自动能识别出来。
		ht.Insert(make_pair(41,4));
		ht.Insert(pair<int,int>(3,5));
		ht.Insert(make_pair(5,3));
	//	ht.Remove(21);
		//ht.Remove(41);
		//ht.Remove(3);
	
		/*cout << ht.Find(21)<<endl;
		cout << ht.Find(44)<<endl;*/

		cout << ht[1] <<endl;
		cout << ht[3] <<endl;
}

void Teststring()
{
	HashTableBucket<string, string,HashFunDef<string> > ht1;
	ht1.Insert(make_pair("yangbin", "www"));
	ht1.Insert(pair<string,string>("笔记本", "笔"));
	ht1.Insert(make_pair("我就是我", "不一样的烟火"));
	ht1.Insert(make_pair("哈哈", "呵呵"));

//	cout << ht1.Find("笔记本") <<endl;
	//cout << ht1["笔记本"] <<endl;
	//ht1.Remove("笔记本");
	//cout << ht1.Find("笔记本") <<endl;

	HashTableBucket<string, string,HashFunDef<string> >::Iterator it = ht1.Begin();

	while(it != ht1.End())//node==NULL ht==NULL
	{
		cout<<(*it).second<<endl;
		++it;
	}

}

int main()
{

	test4();

	Teststring();

	cout << "hello ..." <<endl;
	system("pause");
	return 0;
}