算法学习 - Hash Table (Separate Chaining)

最新推荐文章于 2023-05-29 22:50:01 发布

StanfordZhang

最新推荐文章于 2023-05-29 22:50:01 发布

阅读量4.8k

点赞数

分类专栏：算法文章标签： table 算法 insert vector delete struct

本文链接：https://blog.csdn.net/StanfordZhang/article/details/6621444

版权

算法专栏收录该内容

4 篇文章 0 订阅

订阅专栏

1. 本文参考SGI STL中Hash Table的实现以及《STL源码剖析》；

2. Hash Table可提供对任何有名项的存取操作和删除操作。由于操作对象是有名项，所以Hash Table也可被视为一种字典结构。这种结构的用意在于提供常数时间之基本操作；

3. 学习Hash Table的初衷来自于面试一家公司的时候问到这个问题，本来以为本科的时候已经学的不错了，就没复习，没想到被面试官一问，各种不知道……囧……还得继续埋头学习。

4. 这里有一个比较重要的策略要注意，也是我以前学习的时候忽略掉的：当元素的个数（包括新增的元素）大于bucket vector的大小时，表格需要重建，新的buctet vector大小是__stl_prime_list中当前vector大小后面那个数字，用__stl_next_prime来查找。这个质数表有个特点，除了第一个数字，其他每一个数字都大约是前一个的两倍。

以下是源代码（为了便于理解学习，程序没有使用template，以size_t为例）：

#ifndef _STAN_SEPARATE_CHAINING_H_
#define _STAN_SEPARATE_CHAINING_H_

#include <vector>
using namespace std;

typedef struct _Hashtable_node
{
	struct _Hashtable_node* next;
	size_t val;
}node, *pnode; 

// Note: assumes long is at least 32 bits.
enum { __stl_num_primes = 28 };

unsigned long* lower_bound(unsigned long* first, const unsigned long* last, const size_t& value)
{
	size_t len = last - first;
	size_t half;
	unsigned long* middle;
	while (len > 0)
	{
		half = len >> 1;
		middle = first + half;
		if (*middle < value)
		{
			first = middle + 1;
			len = len - half - 1;
		}
		else
		{
			len = half;
		}
	}
	return first;
}

static const unsigned long __stl_prime_list[__stl_num_primes] =
{
	53ul,         97ul,         193ul,       389ul,       769ul,
	1543ul,       3079ul,       6151ul,      12289ul,     24593ul,
	49157ul,      98317ul,      196613ul,    393241ul,    786433ul,
	1572869ul,    3145739ul,    6291469ul,   12582917ul,  25165843ul,
	50331653ul,   100663319ul,  201326611ul, 402653189ul, 805306457ul, 
	1610612741ul, 3221225473ul, 4294967291ul
};

inline unsigned long __stl_next_prime(unsigned long __n)
{
	const unsigned long* __first = __stl_prime_list;
	const unsigned long* __last = __stl_prime_list + (int)__stl_num_primes;
	const unsigned long* pos = lower_bound(const_cast<unsigned long*>(__first), const_cast<unsigned long*>(__last), __n);
	return pos == __last ? *(__last - 1) : *pos;
}

class hashtable
{
public:
	//初始化一个hashtable
	hashtable(const size_t n)
	{
		const size_t n_buckets = __stl_next_prime(n);
		buckets.reserve(n_buckets);
		buckets.insert(buckets.end(), n_buckets, (pnode)0);
		num_elements = 0;
	}
	//创建一个新结点
	node* new_node(const size_t& obj)
	{
		node* n = new node();
		n->next = 0;
		n->val = obj;
		return n;
	}
	//删除一个结点
	void delete_node(node* n)
	{
		n->next = 0;
		n->val = 0;
		delete n;
		n = 0;
	}
	//计算所在的bucket
	size_t bkt_num(const size_t& obj, size_t n) const
	{
		return obj % n;
	}
	//插入不重复元素
	void insert_unique(const size_t& obj)
	{
		resize(num_elements + 1);
		insert_unique_noresize(obj);
	}
	void resize(size_t num_elements_hint)
	{
		const size_t old_n = buckets.size();
		if (num_elements_hint > old_n)
		{
			const size_t n = __stl_next_prime(num_elements_hint);
			if (n > old_n)
			{
				vector<pnode> tmp(n);
				for (size_t bucket = 0; bucket < old_n; ++bucket)
				{
					pnode first = buckets[bucket];
					while (first)
					{
						size_t new_bucket = bkt_num(first->val, n);
						buckets[bucket] = first->next;
						first->next = tmp[new_bucket];
						tmp[new_bucket] = first;
						first = buckets[bucket];
					}
				}
				buckets.swap(tmp);
			}
		}
	}
	void insert_unique_noresize(const size_t& obj)
	{
		const size_t n = bkt_num(obj, buckets.size());
		pnode first = buckets[n];
		for (pnode cur = first; cur; cur = cur->next)
		{
			if (cur->val == obj)
			{
				return;
			}
		}
		pnode tmp = new_node(obj);
		tmp->next = first;
		buckets[n] = tmp;
		++num_elements;
	}

	void insert_equal(const size_t& obj)
	{
		resize(num_elements + 1);
		insert_equal_noresize(obj);
	}
	void insert_equal_noresize(const size_t& obj)
	{
		const size_t n = bkt_num(obj, buckets.size());
		pnode first = buckets[n];
		for (pnode cur = first; cur; cur = cur->next)
		{
			if (cur->val == obj)
			{
				node* tmp = new_node(obj);
				tmp->next = cur->next;
				cur->next = tmp;
				++num_elements;
				return;
			}
		}
		node* tmp = new_node(obj);
		tmp->next = first;
		buckets[n] = tmp;
		++num_elements;
	}
	//返回所找目标的位置
	node* find(const size_t& obj)
	{
		size_t n = bkt_num(obj, buckets.size());
		node* first;
		for (first = buckets[n];first && !(first->val == obj); first = first->next)
		{
		}
		return first;
	}
	//如果有重复的元素，都要删除
	size_t erase(const size_t& obj)
	{
		const size_t n = bkt_num(obj, buckets.size());
		pnode first = buckets[n];
		size_t erased = 0;

		if (first)
		{
			pnode cur = first;
			pnode next = cur->next;
			while (next)
			{
				if (next->val == obj)
				{
					cur->next = next->next;
					delete_node(next);
					next = cur->next;
					++erased;
					--num_elements;
				}
				else
				{
					cur = next;
					next = cur->next;
				}
			}
			if (first->val == obj)
			{
				buckets[n] = first->next;
				delete_node(first);
				++erased;
				--num_elements;
			}
		}
		return erased;
	}
	size_t bucket_count() const
	{
		return buckets.size();
	}
	size_t size() const
	{
		return num_elements;
	}

private:
	size_t num_elements;
	vector<pnode> buckets;
};

#endif

StanfordZhang

关注

0
点赞
踩
2

收藏

觉得还不错? 一键收藏
2
评论
算法学习 - Hash Table (Separate Chaining)

1. 本文参考SGI STL中Hash Table的实现以及《STL源码剖析》；2. Hash Table可提供对任何有名项的存取操作和删除操作。由于操作对象是有名项，所以Hash Table也可被视为一种字典结构。这种结构的用意在于提供常数时间之基本操作；3. 学习Hash T
复制链接

扫一扫