30.模拟实现hashtable

最新推荐文章于 2020-09-09 20:34:21 发布

云疏不知数

最新推荐文章于 2020-09-09 20:34:21 发布

阅读量150

点赞数

分类专栏： C++

本文链接：https://blog.csdn.net/qq_43808700/article/details/105722955

版权

C++ 专栏收录该内容

35 篇文章 0 订阅

订阅专栏

模拟实现hashtable

闭散列法:线性探测法

#include <iostream>
#include <vector>
using namespace std;


/*
newHt????
负载（载荷）因子：
_size * 10 / _ht.capacity() >= 7
容量以素数增容的原因？
*/

/*
采用闭散列处理哈希冲突时，不能随便物理删除哈希表中已有的元素，若直接删除元素会影响其他元素的搜索。
比如删除元素4，如果直接删除掉，44查找起来可能会受影响。因此线性探测采用标记的伪删除法来删除一个元素。
//哈希表每个空间给个标记
//EMPTY此位置空，EXIST此位置已经有元素，DELETE元素已经删除
enum State{EMPTY，EXIST，DELETE}；I
*/


//hash设计时要尽可能少冲突，非素数有一对公约数，冲突概率暴涨，因此capacity要是素数（质数）
static const int num_primes = 32;
static const unsigned long prime_list[num_primes] =
{
	3, 7, 13, 19,
  53,         97,           193,         389,       769,
  1543,       3079,         6151,        12289,     24593,
  49157,      98317,        196613,      393241,    786433,
  1572869,    3145739,      6291469,     12582917,  25165843,
  50331653,   100663319,    201326611,   402653189, 805306457,
  1610612741, 3221225473ul, 4294967291ul
};

unsigned long GetNextPrime(size_t num) {
	for (int i = 0; i < num_primes; ++i) {
		if (prime_list[i] > num)
			return prime_list[i];
	}
	return prime_list[num_primes - 1];
}

enum State { EMPTY, EXIST, DELETE };
template<class key, class value>  //key-value
class hashTable
{
	struct elemt {
		pair<key, value> _val;
		State _state;
	};
public:
//每个空间都要有key-value, state
//构造函数
	hashTable(size_t capacity = 3):_size(0), _ht(capacity) {
		//_ht(capacity) 初始化列表中，（number）是为容器开辟了number大小的空间	
		for (size_t i = 0; i < capacity; ++i) 
			_ht[i]._state = EMPTY;
	}
public:
//插入方法
//闭散列，线性探测法
	bool insert(const pair<key, value>& val)
	{
		// 检测哈希表底层空间是否充足, 动态规划的思想
		checkCapacity();
		size_t hashAddr = hashFunc(val.first);
		// size_t startAddr = hashAddr;
		while (_ht[hashAddr]._state != EMPTY)
		{
			if (_ht[hashAddr]._state == EXIST && _ht[hashAddr]._val.first == val.first)
				return false;   //重复键值的元素是无法插入的
			hashAddr++;
			if (hashAddr == _ht.capacity())
				hashAddr = 0;
			/*
			// 转一圈也没有找到，注意：动态哈希表，该种情况可以不用考虑，哈希表中元素个数到达
			一定的数量，哈希冲突概率会增大，需要扩容来降低哈希冲突，因此哈希表中元素是不会存满的
			if(hashAddr == startAddr)
			return false;
			*/
		}
		// 插入元素
		_ht[hashAddr]._state = EXIST;
		_ht[hashAddr]._val = val;
		_size++;
		return true;
	}
//获取容量
	size_t capacity() {
		return this->_ht.size();
	}
//获取size
	size_t size() {
		return this->_size;
	}
//查找
	int find(const key& t) {
		size_t hashAddr = hashFunc(t);
		while (_ht[hashAddr]._state != EMPTY) {
			if (_ht[hashAddr]._state != DELETE && _ht[hashAddr]._val.first == t)
				return hashAddr;
			hashAddr++;
		}
		return -1;
	}
//删除
	bool erase(const key &key) {
		int index = find(key);
		if (-1 == index)
			return false;
		_ht[index]._state = DELETE;
		_size--;
		return true; 
	}
//交换hash
	void Swap(hashTable<key, value> &ht) {
		swap(_ht, ht._ht);
		swap(_size, ht._size);
}
private:
//检查容量， 不够就扩容
	void checkCapacity()
	{
		if (_size * 10 / _ht.capacity() >= 7)
		{
			hashTable<key, value> newHt(GetNextPrime(_ht.capacity()));
			for (size_t i = 0; i < _ht.capacity(); ++i){
				if (_ht[i]._state == EXIST)
					newHt.insert(_ht[i]._val);
			}
			Swap(newHt);
		}
	}
//哈希函数：寻找插入地址
	size_t hashFunc(const key& data)	{
		return data % _ht.capacity();
	}
private:
	vector<elemt>  _ht;
	size_t       _size;
};
int main() {
	hashTable<int, int> ht;
	int ar[] = { 4, 6, 8, 3, 6, 13, 1, 2, 9 };
	int n = sizeof(ar) / sizeof(int);
	for (int i = 0; i < n; ++i) {
		ht.insert(pair<int, int>(ar[i], ar[i]));
	}
	cout << ht.size() << endl;  //当前元素个数
	ht.erase(8);
	cout << ht.size() << endl;  //删除之后元素个数
	cout <<"capacity : "<< ht.capacity() << endl;
	cout <<"key = 6 : index = "<< ht.find(6) << endl;
	cout << "key = 8 : index = " << ht.find(8) << endl;
	cout <<"key = 13 : index = " << ht.find(13) << endl;

	return 0;
}

/*
unordered系列关联式容器的底层实现是hashtable，
比如在undered_set中value就是key，而在unordered_map中value代表键值对的值
哈希函数使用除留余数法计算存放地址，那么key就必须是整形或者转换为整形才能取模==
*/

闭散列法:线性探测法

开散列法(链地址法(开链法))

#include <algorithm>
#include <iostream>
#include <vector>
#include <string>
using namespace std;

// Note: assumes long is at least 32 bits.
static const int __stl_num_primes = 28;
static const unsigned long __stl_prime_list[__stl_num_primes] =
{
	53,         97,           193,         389,       769,
	1543,       3079,         6151,        12289,     24593,
	49157,      98317,        196613,      393241,    786433,
	1572869,    3145739,      6291469,     12582917,  25165843,
	50331653,   100663319,    201326611,   402653189, 805306457,
	1610612741, 3221225473ul, 4294967291ul
};

inline unsigned long _stl_next_prime(unsigned long n)
{
	const unsigned long* first = __stl_prime_list;
	const unsigned long* last = __stl_prime_list + __stl_num_primes;
	const unsigned long* pos = lower_bound(first, last, n); 
	//lower_bound是算法库algorithm中的函数，在[first, last)区间上返回一个存放不小于n的数的内存地址
	return pos == last ? *(last - 1) : *pos;
}



//结点
template<class Value>
struct hashtable_node {
	hashtable_node* next;
	Value val;
};

#if 0
//实现迭代器
template <class Value, class Key>
struct __hashtable_iterator {
	node* cur;
	hashtable* ht;

	__hashtable_iterator(node* n, hashtable* tab) : cur(n), ht(tab) {}
	__hashtable_iterator() {}
	reference operator*() const { return cur->val; }
	pointer operator->() const { return &(operator*()); }
	iterator& operator++(); 
	iterator operator++(int);
	bool operator==(const iterator& it) const { return cur == it.cur; }
	bool operator!=(const iterator& it) const { return cur != it.cur; }
};
#endif


template <class Value, class Key>
class hashtable {
typedef size_t size_type;
typedef struct hashtable_node<Value> node;
typedef Value value_type;
typedef Value key_type;
public:
//构造函数
	hashtable(size_t n): num_elements(n){
		initialize_buckets(n);
	}
//开辟新结点
	node* new_node(const value_type& obj)
	{
		node* n = (node*)malloc(sizeof(node));
		n->next = (node*)0;
		n->val = obj;
		return n;
	}
	//判断是否需要扩容
	void resize(size_type num_elements_hint)
	{
		const size_type old_n = buckets.size();
		if (num_elements_hint > old_n) {
			const size_type n = next_size(num_elements_hint);
			if (n > old_n) {
				vector<node*> tmp(n, (node*)0);
				for (size_type bucket = 0; bucket < old_n; ++bucket) {
					node* first = buckets[bucket];
					while (first) {
						size_type new_bucket = bkt_num_key(first->val, n);
						buckets[bucket] = first->next;
						first->next = tmp[new_bucket];
						tmp[new_bucket] = first;
						first = buckets[bucket];
					}
				}
				buckets.swap(tmp);
			}
		}
	}
//插入方法(唯一值)
	bool insert_unique(const value_type& obj)
	{
		/*当元素个数大于hashtable的容量时，一个桶下面的元素个数会非常多，容易牺牲索引效率
		在插入之前要先判断元素个数是否大于容量，在元素个数大于容量时要扩容*/
		resize(num_elements + 1);
		return insert_unique_noresize(obj);
	}
	size_type bkt_num_key(const key_type &key, size_t n) const {
		return key % n;
	}
	size_type bkt_num_key(const key_type& key) const {
		return bkt_num_key(key, buckets.size());
		//?????????????????????????????????是size还是capacity
	}
	size_type bkt_num(const value_type& obj) {
		return bkt_num_key(obj);  //sgi中参数是get_key(obj)， 因为值不一定是int类型
	}
	bool insert_unique_noresize(const value_type& obj) {
		//1.计算哈希地址
		const size_type n = bkt_num(obj);
		node* first = buckets[n];

		for (node* cur = first; cur; cur = cur->next)
			if (cur->val == obj)
				return false;
		//头插
		node* tmp = new_node(obj);
		tmp->next = first;
		buckets[n] = tmp;
		++num_elements;
		return true;
	}
//插入方法(相等值)
	bool insert_equal_noresize(const value_type& obj)
	{
		const size_type n = bkt_num(obj);
		node* first = buckets[n];

		//在桶中第一个相同的值后面尾插
		for (node* cur = first; cur; cur = cur->next)
			if (cur->val == obj) {
				node* tmp = new_node(obj);
				tmp->next = cur->next;
				cur->next = tmp;
				++num_elements;
				return true;
			}
		//桶中没有相同值就头插
		node* tmp = new_node(obj);
		tmp->next = first;
		buckets[n] = tmp;
		++num_elements;
		return true;
	}
	bool insert_equal(const value_type& obj)
	{
		resize(num_elements + 1);
		return insert_equal_noresize(obj);
	}


//求size
	size_type size()const {
		return num_elements;
	}
//求最大容量
	size_type max_size()const {
		return size_type(-1);
	}
//判空
	bool empty()const {
		return size() == 0;
	}
	void show_hashtable() {
		for (size_t i = 0; i < buckets.size(); ++i) {
			node* p = buckets[i];
			cout << "bucket["<<i<<"] :";
			while (p != NULL) {
				cout << p->val << "->";
				p = p->next;
			}
			cout << endl;
		}

	}
private:
	size_type next_size(size_type n) const {
		return _stl_next_prime(n);
	}
	void initialize_buckets(size_type n) {
		const size_type n_buckets = next_size(n);
		//预留空间
		buckets.reserve(n_buckets);
		//插入空指针
		buckets.insert(buckets.end(), n_buckets, (node*)0);
		//元素个数赋0
		num_elements = 0;	
	}
private:
	vector<node*> buckets;
	size_type num_elements;
};

int main() {
	int ar[] = {2, 55, 108, 161, 6, 8, 6, 8, 4};
	int n = sizeof(ar) / sizeof(int);
	hashtable<int, int> ht(53);
	for (int i = 0; i < n; ++i) {
		//ht.insert_unique(ar[i]);
		ht.insert_equal(ar[i]);
	}
	ht.show_hashtable();
	return 0;
}
//代码格式化:Ctrl+K+F

云疏不知数

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
0
评论
30.模拟实现hashtable

模拟实现hashtable闭散列法:线性探测法 #include <iostream>#include <vector>using namespace std;/*newHt????负载（载荷）因子：_size * 10 / _ht.capacity() >= 7容量以素数增容的原因？*//*采用闭散列处理哈希冲突时，不能随便物理...
复制链接

扫一扫

专栏目录