5.7 hashtable
1、碰撞问题的解决
线性探测:负载系数在0-1之间(元素个数除以表格大小)。
二次探测:如果位置已被占用,则依序尝试H+1^2, H+2^2, ... , H+i^2,而不是像线性探测那样尝试H+1, H+2 ... , H+i。
开链法:负载系数大于1。
2、哈希表的bucket和node
template <class Value>
struct __hashtable_node{
__hashtable_node* next;
Value val;
};
3、哈希表的迭代器
template <class Value, class Key, class HashFcn,
class ExtractKey, class EqualKey, class Alloc>
struct __hashtable_iterator {
typedef hashtable<Value, Key, HashFcn, ExtractKey, EqualKey, Alloc>
hashtable;
typedef __hashtable_iterator<Value, Key, HashFcn,
ExtractKey, EqualKey, Alloc>
iterator;
typedef __hashtable_const_iterator<Value, Key, HashFcn,
ExtractKey, EqualKey, Alloc>
const_iterator;
typedef __hashtable_node<Value> node;
typedef forward_iterator_tag iterator_category;
typedef Value value_type;
typedef ptrdiff_t difference_type;
typedef size_t size_type;
typedef Value& reference;
typedef Value* pointer;
node* cur; //迭代器目前所指的节点
hashtable* ht; //保持对容器的连结关系(因为可能需要从bucket跳到bucket)
__hashtable_iterator(node* n, hashtable* tab) : cur(n), ht(tab) {}
__hashtable_iterator() {}
reference operator*() const { return cur->val; }
#ifndef __SGI_STL_NO_ARROW_OPERATOR
pointer operator->() const { return &(operator*()); }
#endif /* __SGI_STL_NO_ARROW_OPERATOR */
iterator& operator++();
iterator operator++(int);
bool operator==(const iterator& it) const { return cur == it.cur; }
bool operator!=(const iterator& it) const { return cur != it.cur; }
};
template <class V, class K, class HF, class ExK, class EqK, class A>
__hashtable_const_iterator<V, K, HF, ExK, EqK, A>&
__hashtable_const_iterator<V, K, HF, ExK, EqK, A>::operator++()
{
const node* old = cur;
cur = cur->next;
if (!cur) {
size_type bucket = ht->bkt_num(old->val); //定位出下一个bucket
while (!cur && ++bucket < ht->buckets.size())
cur = ht->buckets[bucket];
}
return *this;
}
4、hashtable的数据结构
template <class Value, class Key, class HashFcn,
class ExtractKey, class EqualKey,
class Alloc>
class hashtable {
public:
typedef Key key_type;
typedef Value value_type;
typedef HashFcn hasher; //hash function的函数型别
typedef EqualKey key_equal;
typedef size_t size_type;
typedef ptrdiff_t difference_type;
typedef value_type* pointer;
typedef const value_type* const_pointer;
typedef value_type& reference;
typedef const value_type& const_reference;
hasher hash_funct() const { return hash; }
key_equal key_eq() const { return equals; }
private:
hasher hash;
key_equal equals; //判断键值相同与否的方法(函数或仿函数)
ExtractKey get_key; //从节点中取出键值的方法(函数或仿函数)
typedef __hashtable_node<Value> node;
typedef simple_alloc<node, Alloc> node_allocator;
vector<node*,Alloc> buckets;
size_type num_elements;
public:
typedef __hashtable_iterator<Value, Key, HashFcn, ExtractKey, EqualKey,
Alloc>
iterator;
typedef __hashtable_const_iterator<Value, Key, HashFcn, ExtractKey, EqualKey,
Alloc>
const_iterator;
friend struct
__hashtable_iterator<Value, Key, HashFcn, ExtractKey, EqualKey, Alloc>;
friend struct
__hashtable_const_iterator<Value, Key, HashFcn, ExtractKey, EqualKey, Alloc>;
...
};
5、hashtable的构造与内存管理
hashtable(size_type n,
const HashFcn& hf,
const EqualKey& eql,
const ExtractKey& ext)
: hash(hf), equals(eql), get_key(ext), num_elements(0)
{
initialize_buckets(n);
}
void initialize_buckets(size_type n)
{
const size_type n_buckets = next_size(n); //计算出n个元素应当分配多大的哈希表
buckets.reserve(n_buckets); //buckets是用vector实现的,先预留容量
buckets.insert(buckets.end(), n_buckets, (node*) 0);
num_elements = 0;
}
注意: SGI hashtable的hash仿函数(hash<...>)只能处理short、int、long、char *、const char*、char,不能处理string、float、double。欲处理这些型别,用户必须自行定义它们的hash function。
5.8 hash_set
由于RB-tree有自动排序功能而hashtable没有,所以set的元素有自动排序功能而hash_set没有。除此之外,hash_set的使用方式与set完全相同。
#ifndef __STL_LIMITED_DEFAULT_TEMPLATES
template <class Value, class HashFcn = hash<Value>,
class EqualKey = equal_to<Value>,
class Alloc = alloc>
#else
template <class Value, class HashFcn, class EqualKey, class Alloc = alloc>
#endif
class hash_set
{
private:
typedef hashtable<Value, Value, HashFcn, identity<Value>,
EqualKey, Alloc> ht;
ht rep;
public:
typedef typename ht::key_type key_type;
typedef typename ht::value_type value_type;
typedef typename ht::hasher hasher;
typedef typename ht::key_equal key_equal;
typedef typename ht::size_type size_type;
typedef typename ht::difference_type difference_type;
typedef typename ht::const_pointer pointer; //注意不可以直接修改元素的值
typedef typename ht::const_pointer const_pointer;
typedef typename ht::const_reference reference;
typedef typename ht::const_reference const_reference;
typedef typename ht::const_iterator iterator;
typedef typename ht::const_iterator const_iterator;
hasher hash_funct() const { return rep.hash_funct(); }
key_equal key_eq() const { return rep.key_eq(); }
public:
hash_set() : rep(100, hasher(), key_equal()) {} //缺省使用大小为100的表格,将被hashtable调整为最接近且较大的质数。
explicit hash_set(size_type n) : rep(n, hasher(), key_equal()) {}
hash_set(size_type n, const hasher& hf) : rep(n, hf, key_equal()) {}
hash_set(size_type n, const hasher& hf, const key_equal& eql)
: rep(n, hf, eql) {}
void swap(hash_set& hs) { rep.swap(hs.rep); }
friend bool operator== __STL_NULL_TMPL_ARGS (const hash_set&,
const hash_set&);
iterator begin() const { return rep.begin(); }
iterator end() const { return rep.end(); }
public:
pair<iterator, bool> insert(const value_type& obj)
{
pair<typename ht::iterator, bool> p = rep.insert_unique(obj);
return pair<iterator, bool>(p.first, p.second);
}
...
};
5.9 hash_map
同样无排序。
#ifndef __STL_LIMITED_DEFAULT_TEMPLATES
template <class Key, class T, class HashFcn = hash<Key>,
class EqualKey = equal_to<Key>,
class Alloc = alloc>
#else
template <class Key, class T, class HashFcn, class EqualKey,
class Alloc = alloc>
#endif
class hash_map
{
private:
typedef hashtable<pair<const Key, T>, Key, HashFcn,
select1st<pair<const Key, T> >, EqualKey, Alloc> ht;
ht rep;
public:
typedef typename ht::key_type key_type;
typedef T data_type;
typedef T mapped_type;
typedef typename ht::value_type value_type;
typedef typename ht::hasher hasher;
typedef typename ht::key_equal key_equal;
typedef typename ht::size_type size_type;
typedef typename ht::difference_type difference_type;
typedef typename ht::pointer pointer;
typedef typename ht::const_pointer const_pointer;
typedef typename ht::reference reference;
typedef typename ht::const_reference const_reference;
typedef typename ht::iterator iterator;
typedef typename ht::const_iterator const_iterator;
hasher hash_funct() const { return rep.hash_funct(); }
key_equal key_eq() const { return rep.key_eq(); }
public:
hash_map() : rep(100, hasher(), key_equal()) {}
explicit hash_map(size_type n) : rep(n, hasher(), key_equal()) {}
hash_map(size_type n, const hasher& hf) : rep(n, hf, key_equal()) {}
hash_map(size_type n, const hasher& hf, const key_equal& eql)
: rep(n, hf, eql) {}
T& operator[](const key_type& key) {
return rep.find_or_insert(value_type(key, T())).second;
}
...
};
5.10 hash_multiset
采用insert_equal()。
5.11 hash_multimap
采用insert_equal()。