哈希map比红黑树好实现,查找速度也更快,只是顺序查找效率低下。
哈希map的精髓是一个好用,简单,分布均匀的hash函数,《算法4》中的实现是除留余数法,本文基于此种方法实现的哈希map。
此种方法的精髓是选择合适的除数,通常它应该是质数。如果不是质数,那么当被除数与除数含有公约数,当约去公约数,最后剩下一个小质数,此时哈希表下标只能用这个小质数,也就不能均匀分散了。
常用质数:
// 17, 31, 53, 97, 193, 389, 769,
// 1543, 3079, 6151, 12289, 24593,
// 49157, 98317, 196613, 393241, 786433,
// 1572869, 3145739, 6291469, 12582917, 25165843,
// 50331653, 100663319, 201326611, 402653189, 805306457,
// 1610612741, 3221225473, 4294967291
下面拉链法的哈希map代码没有给出自动扩充的实现,需要知道最终插入元素的数量,自己给出构造参数中的质数。
#ifndef SEPARATECHAINNINGHASHST
#define SEPARATECHAINNINGHASHST
#include <vector>
#include <functional>
namespace ST
{
template <typename K, typename V>
struct listST;
template <typename K, typename V>
struct listNode
{
public:
listNode(const K &keys, const V &vals, listNode *nexts)
: key(keys), val(vals), next(nexts) {}
private:
K key;
V val;
listNode *next = nullptr;
friend listST<K, V>;
};
template <typename K, typename V>
struct listST
{
public:
listST() = default;
listST(const listST &rhs)
{
listNode<K, V> *x = rhs.first;
while (x)
{
first = new listNode<K, V>(x->key, x->val, first);
x = x->next;
}
sizeNum = rhs.sizeNum;
}
listST &operator=(const listST &rhs)
{
if (rhs.first && first != rhs.first)
{
while (first)
{
listNode<K, V> *temp = first;
first = first->next;
delete temp;
}
listNode<K, V> *x = rhs.first;
while (x)
{
first = new listNode<K, V>(x->key, x->val, first);
x = x->next;
}
sizeNum = rhs.sizeNum;
return *this;
}
}
~listST()
{
while (first)
{
listNode<K, V> *temp = first;
first = first->next;
delete temp;
}
}
std::pair<bool, V> get(const K &keys);
bool contains(const K &key);
void del(const K &key);
void put(const K &keys, const V &vals);
std::size_t size() { return sizeNum; }
std::vector<K> keyVec(); //表中所有键的集合,已排序
std::vector<K> unOrderKeyVec(); //表中所有键的集合,已排序
private:
listNode<K, V> *first = nullptr;
std::size_t sizeNum = 0;
};
template <typename K, typename V>
std::pair<bool, V> listST<K, V>::get(const K &keys)
{
listNode<K, V> *x = first;
while (x)
{
if (x->key == keys)
{
return std::pair<bool, V>(true, x->val);
}
x = x->next;
}
return std::pair<bool, V>(false, V());
}
template <typename K, typename V>
bool listST<K, V>::contains(const K &keys)
{
listNode<K, V> *x = first;
while (x)
{
if (x->key == keys)
{
return true;
}
x = x->next;
}
return false;
}
template <typename K, typename V>
void listST<K, V>::del(const K &keys)
{
if (!first)
{
return;
}
if (first->key == keys)
{
listNode<K, V> *x = first;
first = first->next;
delete x;
x = nullptr;
--sizeNum;
return;
}
listNode<K, V> *x = first;
while (x->next)
{
if (x->next->key == keys)
{
listNode<K, V> *xNext = x->next;
x->next = x->next->next;
delete xNext;
xNext = nullptr;
--sizeNum;
return;
}
x = x->next;
}
}
template <typename K, typename V>
void listST<K, V>::put(const K &keys, const V &vals)
{
listNode<K, V> *x = first;
while (x)
{
if (x->key == keys)
{
x->val = vals;
return;
}
x = x->next;
}
first = new listNode<K, V>(keys, vals, first);
++sizeNum;
}
template <typename K, typename V>
std::vector<K> listST<K, V>::keyVec()
{
std::vector<K> veck;
veck.reserve(sizeNum);
listNode<K, V> *x = first;
while (x)
{
veck.push_back(x->key);
x = x->next;
}
std::sort(veck.begin(), veck.end());
return veck;
}
template <typename K, typename V>
std::vector<K> listST<K, V>::unOrderKeyVec()
{
std::vector<K> veck;
veck.reserve(sizeNum);
listNode<K, V> *x = first;
while (x)
{
veck.push_back(x->key);
x = x->next;
}
return veck;
}
template <typename K, typename V>
struct HashST
{
public:
HashST(std::size_t num)
: M(num), N(0)
{
st = std::vector<listST<K, V>>(M);
}
std::pair<bool, V> get(const K &keys)
{
return st[hashNum(keys)].get(keys);
}
bool contains(const K &keys)
{
return st[hashNum(keys)].contains(keys);
}
void put(const K &keys, const V &vals)
{
std::size_t chainN = st[hashNum(keys)].size();
st[hashNum(keys)].put(keys, vals);
if (chainN < st[hashNum(keys)].size())
{
++N;
}
}
void del(const K &keys)
{
std::size_t chainN = st[hashNum(keys)].size();
st[hashNum(keys)].del(keys);
if (chainN > st[hashNum(keys)].size())
{
--N;
}
}
std::vector<K> keyVec()
{
std::vector<K> keysVec;
keysVec.reserve(N);
for (std::size_t i = 0; i != M; ++i)
{
auto temp = st[i].unOrderKeyVec();
if (!temp.empty())
{
keysVec.insert(keysVec.end(), temp.begin(), temp.end());
}
}
std::sort(keysVec.begin(), keysVec.end());
return keysVec;
}
private:
std::size_t hashNum(const K &keys)
{
return std::hash<K>()(keys) % M;
}
std::vector<listST<K, V>> st;
std::size_t N = 0;
std::size_t M = 1;
};
} // namespace ST
#endif
基于线性探测法实现的哈希map给出了自动变长的实现,需要注意,自动变长是需要代价的,如果一开始就知道最终的大小K,可以直接设置哈希表的构造参数为 2K,会自动匹配合适的质数大小。
#ifndef LINEARPROBINGHASHST
#define LINEARPROBINGHASHST
#include <vector>
namespace ST
{
template <typename K, typename V>
struct LPHashST
{
public:
LPHashST()
{
keysVec = std::vector<std::pair<bool, K>>(M, {false, K()});
valsVec = std::vector<V>(M);
}
LPHashST(std::size_t num)
{
for (int i = 0; i != 30; ++i)
{
if (primeList[i] >= num)
{
M = primeList[i];
break;
}
}
keysVec = std::vector<std::pair<bool, K>>(M, {false, K()});
valsVec = std::vector<V>(M);
}
void put(const K &keys, const V &vals)
{
if (N >= M / 2)
{
resize(M + 1);
}
std::size_t i = 0;
for (i = hashNum(keys); keysVec[i].first == true; i = (i + 1) % M)
{
if (keysVec[i].second == keys)
{
valsVec[i] = vals;
return;
}
}
keysVec[i] = {true, keys};
valsVec[i] = vals;
++N;
}
std::pair<bool, V> get(const K &keys)
{
for (std::size_t i = hashNum(keys); keysVec[i].first == true; i = (i + 1) % M)
{
if (keysVec[i].second == keys)
{
return std::pair<bool, V>{true, valsVec[i]};
}
}
return {false, V()};
}
bool contains(const K &keys)
{
for (std::size_t i = hashNum(keys); keysVec[i].first == true; i = (i + 1) % M)
{
if (keysVec[i].second == keys)
{
return true;
}
}
return false;
}
void del(const K &keys)
{
if (!contains(keys))
{
return;
}
std::size_t i = hashNum(keys);
while (keys != keysVec[i].second)
{
i = (i + 1) % M;
}
keysVec[i].first = false;
i = (i + 1) % M;
while (keysVec[i].first)
{
K keyToRedo = keysVec[i].second;
V valToRedo = valsVec[i];
keysVec[i].first = false;
--N;
put(keyToRedo, valToRedo);
i = (i + 1) % M;
}
--N;
if (N >= 17 && N <= M / 8)
{
for (int i = 0; i != 30; ++i)
{
if (primeList[i] >= M)
{
resize(primeList[i - 1]);
break;
}
}
}
}
std::vector<K> keyVec()
{
std::vector<K> temp;
temp.reserve(N + 1);
for (auto &&i : keysVec)
{
if (i.first)
{
temp.push_back(i.second);
}
}
return temp;
}
private:
std::size_t hashNum(const K &keys)
{
return std::hash<K>()(keys) % M;
}
void resize(std::size_t cap)
{
LPHashST t(cap);
for (std::size_t i = 0; i != M; ++i)
{
if (keysVec[i].first)
{
t.put(keysVec[i].second, valsVec[i]);
}
}
keysVec = t.keysVec;
valsVec = t.valsVec;
M = t.M;
}
std::size_t N = 0;
std::size_t M = 17;
std::vector<std::pair<bool, K>> keysVec;
std::vector<V> valsVec;
static const std::vector<std::size_t> primeList;
// 17, 31, 53, 97, 193, 389, 769,
// 1543, 3079, 6151, 12289, 24593,
// 49157, 98317, 196613, 393241, 786433,
// 1572869, 3145739, 6291469, 12582917, 25165843,
// 50331653, 100663319, 201326611, 402653189, 805306457,
// 1610612741, 3221225473, 4294967291
};
template <typename K, typename V>
const std::vector<std::size_t> ST::LPHashST<K, V>::primeList =
{17, 31, 53, 97, 193, 389, 769, 1543, 3079, 6151, 12289, 24593,
49157, 98317, 196613, 393241, 786433, 1572869, 3145739, 6291469,
12582917, 25165843, 50331653, 100663319, 201326611, 402653189,
805306457, 1610612741, 2610612741, 4294967291};
} // namespace ST
#endif