1. 本文参考SGI STL中Hash Table的实现以及《STL源码剖析》;
2. Hash Table可提供对任何有名项的存取操作和删除操作。由于操作对象是有名项,所以Hash Table也可被视为一种字典结构。这种结构的用意在于提供常数时间之基本操作;
3. 学习Hash Table的初衷来自于面试一家公司的时候问到这个问题,本来以为本科的时候已经学的不错了,就没复习,没想到被面试官一问,各种不知道……囧……还得继续埋头学习。
4. 这里有一个比较重要的策略要注意,也是我以前学习的时候忽略掉的:当元素的个数(包括新增的元素)大于bucket vector的大小时,表格需要重建,新的buctet vector大小是__stl_prime_list中当前vector大小后面那个数字,用__stl_next_prime来查找。这个质数表有个特点,除了第一个数字,其他每一个数字都大约是前一个的两倍。
以下是源代码(为了便于理解学习,程序没有使用template,以size_t为例):
#ifndef _STAN_SEPARATE_CHAINING_H_
#define _STAN_SEPARATE_CHAINING_H_
#include <vector>
using namespace std;
typedef struct _Hashtable_node
{
struct _Hashtable_node* next;
size_t val;
}node, *pnode;
// Note: assumes long is at least 32 bits.
enum { __stl_num_primes = 28 };
unsigned long* lower_bound(unsigned long* first, const unsigned long* last, const size_t& value)
{
size_t len = last - first;
size_t half;
unsigned long* middle;
while (len > 0)
{
half = len >> 1;
middle = first + half;
if (*middle < value)
{
first = middle + 1;
len = len - half - 1;
}
else
{
len = half;
}
}
return first;
}
static const unsigned long __stl_prime_list[__stl_num_primes] =
{
53ul, 97ul, 193ul, 389ul, 769ul,
1543ul, 3079ul, 6151ul, 12289ul, 24593ul,
49157ul, 98317ul, 196613ul, 393241ul, 786433ul,
1572869ul, 3145739ul, 6291469ul, 12582917ul, 25165843ul,
50331653ul, 100663319ul, 201326611ul, 402653189ul, 805306457ul,
1610612741ul, 3221225473ul, 4294967291ul
};
inline unsigned long __stl_next_prime(unsigned long __n)
{
const unsigned long* __first = __stl_prime_list;
const unsigned long* __last = __stl_prime_list + (int)__stl_num_primes;
const unsigned long* pos = lower_bound(const_cast<unsigned long*>(__first), const_cast<unsigned long*>(__last), __n);
return pos == __last ? *(__last - 1) : *pos;
}
class hashtable
{
public:
//初始化一个hashtable
hashtable(const size_t n)
{
const size_t n_buckets = __stl_next_prime(n);
buckets.reserve(n_buckets);
buckets.insert(buckets.end(), n_buckets, (pnode)0);
num_elements = 0;
}
//创建一个新结点
node* new_node(const size_t& obj)
{
node* n = new node();
n->next = 0;
n->val = obj;
return n;
}
//删除一个结点
void delete_node(node* n)
{
n->next = 0;
n->val = 0;
delete n;
n = 0;
}
//计算所在的bucket
size_t bkt_num(const size_t& obj, size_t n) const
{
return obj % n;
}
//插入不重复元素
void insert_unique(const size_t& obj)
{
resize(num_elements + 1);
insert_unique_noresize(obj);
}
void resize(size_t num_elements_hint)
{
const size_t old_n = buckets.size();
if (num_elements_hint > old_n)
{
const size_t n = __stl_next_prime(num_elements_hint);
if (n > old_n)
{
vector<pnode> tmp(n);
for (size_t bucket = 0; bucket < old_n; ++bucket)
{
pnode first = buckets[bucket];
while (first)
{
size_t new_bucket = bkt_num(first->val, n);
buckets[bucket] = first->next;
first->next = tmp[new_bucket];
tmp[new_bucket] = first;
first = buckets[bucket];
}
}
buckets.swap(tmp);
}
}
}
void insert_unique_noresize(const size_t& obj)
{
const size_t n = bkt_num(obj, buckets.size());
pnode first = buckets[n];
for (pnode cur = first; cur; cur = cur->next)
{
if (cur->val == obj)
{
return;
}
}
pnode tmp = new_node(obj);
tmp->next = first;
buckets[n] = tmp;
++num_elements;
}
void insert_equal(const size_t& obj)
{
resize(num_elements + 1);
insert_equal_noresize(obj);
}
void insert_equal_noresize(const size_t& obj)
{
const size_t n = bkt_num(obj, buckets.size());
pnode first = buckets[n];
for (pnode cur = first; cur; cur = cur->next)
{
if (cur->val == obj)
{
node* tmp = new_node(obj);
tmp->next = cur->next;
cur->next = tmp;
++num_elements;
return;
}
}
node* tmp = new_node(obj);
tmp->next = first;
buckets[n] = tmp;
++num_elements;
}
//返回所找目标的位置
node* find(const size_t& obj)
{
size_t n = bkt_num(obj, buckets.size());
node* first;
for (first = buckets[n];first && !(first->val == obj); first = first->next)
{
}
return first;
}
//如果有重复的元素,都要删除
size_t erase(const size_t& obj)
{
const size_t n = bkt_num(obj, buckets.size());
pnode first = buckets[n];
size_t erased = 0;
if (first)
{
pnode cur = first;
pnode next = cur->next;
while (next)
{
if (next->val == obj)
{
cur->next = next->next;
delete_node(next);
next = cur->next;
++erased;
--num_elements;
}
else
{
cur = next;
next = cur->next;
}
}
if (first->val == obj)
{
buckets[n] = first->next;
delete_node(first);
++erased;
--num_elements;
}
}
return erased;
}
size_t bucket_count() const
{
return buckets.size();
}
size_t size() const
{
return num_elements;
}
private:
size_t num_elements;
vector<pnode> buckets;
};
#endif