算法学习 - Hash Table (Separate Chaining)

1. 本文参考SGI STL中Hash Table的实现以及《STL源码剖析》;

2. Hash Table可提供对任何有名项的存取操作和删除操作。由于操作对象是有名项,所以Hash Table也可被视为一种字典结构。这种结构的用意在于提供常数时间之基本操作;

3. 学习Hash Table的初衷来自于面试一家公司的时候问到这个问题,本来以为本科的时候已经学的不错了,就没复习,没想到被面试官一问,各种不知道……囧……还得继续埋头学习。

4. 这里有一个比较重要的策略要注意,也是我以前学习的时候忽略掉的:当元素的个数(包括新增的元素)大于bucket vector的大小时,表格需要重建,新的buctet vector大小是__stl_prime_list中当前vector大小后面那个数字,用__stl_next_prime来查找。这个质数表有个特点,除了第一个数字,其他每一个数字都大约是前一个的两倍。

以下是源代码(为了便于理解学习,程序没有使用template,以size_t为例):

#ifndef _STAN_SEPARATE_CHAINING_H_
#define _STAN_SEPARATE_CHAINING_H_

#include <vector>
using namespace std;

typedef struct _Hashtable_node
{
	struct _Hashtable_node* next;
	size_t val;
}node, *pnode; 

// Note: assumes long is at least 32 bits.
enum { __stl_num_primes = 28 };

unsigned long* lower_bound(unsigned long* first, const unsigned long* last, const size_t& value)
{
	size_t len = last - first;
	size_t half;
	unsigned long* middle;
	while (len > 0)
	{
		half = len >> 1;
		middle = first + half;
		if (*middle < value)
		{
			first = middle + 1;
			len = len - half - 1;
		}
		else
		{
			len = half;
		}
	}
	return first;
}

static const unsigned long __stl_prime_list[__stl_num_primes] =
{
	53ul,         97ul,         193ul,       389ul,       769ul,
	1543ul,       3079ul,       6151ul,      12289ul,     24593ul,
	49157ul,      98317ul,      196613ul,    393241ul,    786433ul,
	1572869ul,    3145739ul,    6291469ul,   12582917ul,  25165843ul,
	50331653ul,   100663319ul,  201326611ul, 402653189ul, 805306457ul, 
	1610612741ul, 3221225473ul, 4294967291ul
};

inline unsigned long __stl_next_prime(unsigned long __n)
{
	const unsigned long* __first = __stl_prime_list;
	const unsigned long* __last = __stl_prime_list + (int)__stl_num_primes;
	const unsigned long* pos = lower_bound(const_cast<unsigned long*>(__first), const_cast<unsigned long*>(__last), __n);
	return pos == __last ? *(__last - 1) : *pos;
}

class hashtable
{
public:
	//初始化一个hashtable
	hashtable(const size_t n)
	{
		const size_t n_buckets = __stl_next_prime(n);
		buckets.reserve(n_buckets);
		buckets.insert(buckets.end(), n_buckets, (pnode)0);
		num_elements = 0;
	}
	//创建一个新结点
	node* new_node(const size_t& obj)
	{
		node* n = new node();
		n->next = 0;
		n->val = obj;
		return n;
	}
	//删除一个结点
	void delete_node(node* n)
	{
		n->next = 0;
		n->val = 0;
		delete n;
		n = 0;
	}
	//计算所在的bucket
	size_t bkt_num(const size_t& obj, size_t n) const
	{
		return obj % n;
	}
	//插入不重复元素
	void insert_unique(const size_t& obj)
	{
		resize(num_elements + 1);
		insert_unique_noresize(obj);
	}
	void resize(size_t num_elements_hint)
	{
		const size_t old_n = buckets.size();
		if (num_elements_hint > old_n)
		{
			const size_t n = __stl_next_prime(num_elements_hint);
			if (n > old_n)
			{
				vector<pnode> tmp(n);
				for (size_t bucket = 0; bucket < old_n; ++bucket)
				{
					pnode first = buckets[bucket];
					while (first)
					{
						size_t new_bucket = bkt_num(first->val, n);
						buckets[bucket] = first->next;
						first->next = tmp[new_bucket];
						tmp[new_bucket] = first;
						first = buckets[bucket];
					}
				}
				buckets.swap(tmp);
			}
		}
	}
	void insert_unique_noresize(const size_t& obj)
	{
		const size_t n = bkt_num(obj, buckets.size());
		pnode first = buckets[n];
		for (pnode cur = first; cur; cur = cur->next)
		{
			if (cur->val == obj)
			{
				return;
			}
		}
		pnode tmp = new_node(obj);
		tmp->next = first;
		buckets[n] = tmp;
		++num_elements;
	}

	void insert_equal(const size_t& obj)
	{
		resize(num_elements + 1);
		insert_equal_noresize(obj);
	}
	void insert_equal_noresize(const size_t& obj)
	{
		const size_t n = bkt_num(obj, buckets.size());
		pnode first = buckets[n];
		for (pnode cur = first; cur; cur = cur->next)
		{
			if (cur->val == obj)
			{
				node* tmp = new_node(obj);
				tmp->next = cur->next;
				cur->next = tmp;
				++num_elements;
				return;
			}
		}
		node* tmp = new_node(obj);
		tmp->next = first;
		buckets[n] = tmp;
		++num_elements;
	}
	//返回所找目标的位置
	node* find(const size_t& obj)
	{
		size_t n = bkt_num(obj, buckets.size());
		node* first;
		for (first = buckets[n];first && !(first->val == obj); first = first->next)
		{
		}
		return first;
	}
	//如果有重复的元素,都要删除
	size_t erase(const size_t& obj)
	{
		const size_t n = bkt_num(obj, buckets.size());
		pnode first = buckets[n];
		size_t erased = 0;

		if (first)
		{
			pnode cur = first;
			pnode next = cur->next;
			while (next)
			{
				if (next->val == obj)
				{
					cur->next = next->next;
					delete_node(next);
					next = cur->next;
					++erased;
					--num_elements;
				}
				else
				{
					cur = next;
					next = cur->next;
				}
			}
			if (first->val == obj)
			{
				buckets[n] = first->next;
				delete_node(first);
				++erased;
				--num_elements;
			}
		}
		return erased;
	}
	size_t bucket_count() const
	{
		return buckets.size();
	}
	size_t size() const
	{
		return num_elements;
	}

private:
	size_t num_elements;
	vector<pnode> buckets;
};

#endif


  • 0
    点赞
  • 2
    收藏
    觉得还不错? 一键收藏
  • 2
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论 2
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值