一、哈希算法
哈希表即散列表,通过哈希算法(Key直接映射到内存中的位置,这种算法有很多种,常见的就是计算取余)。它有几个特征:
1、确定性:一定会有一个位置存在于内存,即不相同的内存结果一定是不相同的输入导致的。
2、哈希碰撞:不相同的输入可能导致相同的结果。
3、不可逆:无法从结果推出输入。
4、混淆:细节更改可能导致结果的不同。
二、STL的unordered_map unordered_multimap
在STL中,unordered_map 的存在是在于提供一个无序的线程安全的容器,在这个容器里提供了一个线性的散列表,每个位置可以认为是一个Slot或者说Bucket,也就是说常说的碰撞使用一个链表来解决,这个链表是单向还是双向的,由实现者自己来定义实现。
从上图可以清晰的看到这种实现机制。
和Map一样它也有两个形式(一对一和一对多):
看一下相关的源码:
template<
class Key,
class T,
class Hash = std::hash<Key>,
class KeyEqual = std::equal_to<Key>,
class Allocator = std::allocator< std::pair<const Key, T> >
> class unordered_map;
namespace pmr {
template <class Key,
class T,
class Hash = std::hash<Key>,
class Pred = std::equal_to<Key>>
using unordered_map = std::unordered_map<Key, T, Hash, Pred,
std::pmr::polymorphic_allocator<std::pair<const Key,T>>>;
}
template<
class Key,
class T,
class Hash = std::hash<Key>,
class KeyEqual = std::equal_to<Key>,
class Allocator = std::allocator< std::pair<const Key, T> >
> class unordered_multimap;
namespace pmr {
template <class Key, class T,
class Hash = std::hash<Key>,
class Pred = std::equal_to<Key>>
using unordered_multimap = std::unordered_multimap<Key, T, Hash, Pred,
std::pmr::polymorphic_allocator<std::pair<const Key,T>>>;
}
再看一下函数的定义,这里就不取全部了:
template<class _Kty,
class _Ty,
class _Hasher = hash<_Kty>,
class _Keyeq = equal_to<_Kty>,
class _Alloc = allocator<pair<const _Kty, _Ty>>>
class unordered_map
: public _Hash<_Umap_traits<_Kty, _Ty,
_Uhash_compare<_Kty, _Hasher, _Keyeq>, _Alloc, false>>
{ // hash table of {key, mapped} values, unique keys
public:
static_assert(!_ENFORCE_MATCHING_ALLOCATORS || is_same_v<pair<const _Kty, _Ty>, typename _Alloc::value_type>,
_MISMATCHED_ALLOCATOR_MESSAGE("unordered_map<Key, Value, Hasher, Eq, Allocator>", "pair<const Key, Value>"));
using _Mytraits = _Uhash_compare<_Kty, _Hasher, _Keyeq>;
using _Mybase = _Hash<_Umap_traits<_Kty, _Ty, _Mytraits, _Alloc, false>>;
using hasher = _Hasher;
using key_type = _Kty;
using mapped_type = _Ty;
using key_equal = _Keyeq;
using key_compare = _Mytraits; // extra
using value_type = typename _Mybase::value_type;
using allocator_type = typename _Mybase::allocator_type;
using size_type = typename _Mybase::size_type;
using difference_type = typename _Mybase::difference_type;
using pointer = typename _Mybase::pointer;
using const_pointer = typename _Mybase::const_pointer;
using reference = value_type&;
using const_reference = const value_type&;
using iterator = typename _Mybase::iterator;
using const_iterator = typename _Mybase::const_iterator;
using local_iterator = typename _Mybase::iterator;
using const_local_iterator = typename _Mybase::const_iterator;
using _Alnode = typename _Mybase::_Alnode;
using _Alnode_traits = typename _Mybase::_Alnode_traits;
using _Pairib = typename _Mybase::_Pairib;
#if _HAS_CXX17
using insert_return_type = _Insert_return_type<iterator, typename _Mybase::node_type>;
#endif /* _HAS_CXX17 */
unordered_map()
: _Mybase(key_compare(), allocator_type())
{ // construct empty map from defaults
}
explicit unordered_map(const allocator_type& _Al)
: _Mybase(key_compare(), _Al)
{ // construct empty map from defaults, allocator
}
unordered_map(const unordered_map& _Right)
: _Mybase(_Right, _Alnode_traits::select_on_container_copy_construction(_Right._List._Getal()))
{ // construct map by copying _Right
}
unordered_map(const unordered_map& _Right, const allocator_type& _Al)
: _Mybase(_Right, _Al)
{ // construct map by copying _Right, allocator
}
explicit unordered_map(size_type _Buckets)
: _Mybase(key_compare(), allocator_type())
{ // construct empty map from bucket count
_Mybase::rehash(_Buckets);
}
unordered_map(size_type _Buckets, const allocator_type& _Al)
: _Mybase(key_compare(), _Al)
{ // construct empty map from bucket count and allocator
_Mybase::rehash(_Buckets);
}
unordered_map(size_type _Buckets, const hasher& _Hasharg)
: _Mybase(key_compare(_Hasharg), allocator_type())
{ // construct empty map from bucket count and hasher
_Mybase::rehash(_Buckets);
}
unordered_map(size_type _Buckets, const hasher& _Hasharg,
const allocator_type& _Al)
: _Mybase(key_compare(_Hasharg), _Al)
{ // construct empty map from bucket count, hasher, and allocator
_Mybase::rehash(_Buckets);
}
unordered_map(size_type _Buckets, const hasher& _Hasharg,
const _Keyeq& _Keyeqarg)
: _Mybase(key_compare(_Hasharg, _Keyeqarg), allocator_type())
{ // construct empty map from bucket count, hasher, and equality comparator
_Mybase::rehash(_Buckets);
}
unordered_map(size_type _Buckets, const hasher& _Hasharg,
const _Keyeq& _Keyeqarg, const allocator_type& _Al)
: _Mybase(key_compare(_Hasharg, _Keyeqarg), _Al)
{ // construct empty map from bucket count, hasher, equality comparator, and allocator
_Mybase::rehash(_Buckets);
}
template<class _Iter>
unordered_map(_Iter _First, _Iter _Last)
: _Mybase(key_compare(), allocator_type())
{ // construct map from sequence, defaults
insert(_First, _Last);
}
template<class _Iter>
unordered_map(_Iter _First, _Iter _Last, const allocator_type& _Al)
: _Mybase(key_compare(), _Al)
{ // construct map from sequence and allocator
insert(_First, _Last);
}
template<class _Iter>
unordered_map(_Iter _First, _Iter _Last,
size_type _Buckets)
: _Mybase(key_compare(), allocator_type())
{ // construct map from sequence and bucket count
_Mybase::rehash(_Buckets);
insert(_First, _Last);
}
template<class _Iter>
unordered_map(_Iter _First, _Iter _Last,
size_type _Buckets, const allocator_type& _Al)
: _Mybase(key_compare(), _Al)
{ // construct map from sequence, bucket count, and allocator
_Mybase::rehash(_Buckets);
insert(_First, _Last);
}
template<class _Iter>
unordered_map(_Iter _First, _Iter _Last,
size_type _Buckets, const hasher& _Hasharg)
: _Mybase(key_compare(_Hasharg), allocator_type())
{ // construct map from sequence, bucket count, and hasher
_Mybase::rehash(_Buckets);
insert(_First, _Last);
}
......
};
这里看一下rehash的算法:
void rehash(size_type _Buckets)
{ // rebuild table with at least _Buckets buckets
size_type _Maxsize = _Vec.max_size() / 4;
size_type _Newsize = _Min_buckets;
while (_Newsize < _Buckets && _Newsize < _Maxsize)
{
_Newsize *= 2; // double until big enough
}
if (_Newsize < _Buckets)
{
_Xout_of_range("invalid hash bucket count");
}
while (!(size() / max_load_factor() < _Newsize) && _Newsize < _Maxsize)
{ // double until load factor okay
_Newsize *= 2;
}
_Init(_Newsize);
_Reinsert();
}
这块儿的代码相对来说比较简单,有兴趣可以看看,不同的库略有不同。
三、例程
应用例程就很简单了(cppreference.com例程):
#include <iostream>
#include <utility>
#include <string>
#include <unordered_map>
int main()
{
std::unordered_map<std::string, std::string> m;
// uses pair's move constructor
m.emplace(std::make_pair(std::string("a"), std::string("a")));
// uses pair's converting move constructor
m.emplace(std::make_pair("b", "abcd"));
// uses pair's template constructor
m.emplace("d", "ddd");
// uses pair's piecewise constructor
m.emplace(std::piecewise_construct,
std::forward_as_tuple("c"),
std::forward_as_tuple(10, 'c'));
// as of C++17, m.try_emplace("c", 10, 'c'); can be used
for (const auto &p : m) {
std::cout << p.first << " => " << p.second << '\n';
}
std::unordered_multimap<int,char> example = {{1,'a'},{2,'b'}};
auto search = example.find(2);
if (search != example.end()) {
std::cout << "Found " << search->first << " " << search->second << '\n';
} else {
std::cout << "Not found\n";
}
}
运行结果如下:
a => a
b => abcd
c => cccccccccc
d => ddd
Found 2 b
库存在的目的就是减少使用的麻烦,不过减少使用的麻烦不代表没坑,如果排序数据中有重复的数据结果会是如何呢?同学们可以自己试试,再结合网上的说明资料就明白原因了。
四、总结
unordered系列的容器的目的就是为了提高访问的效率(虽然unordered_map的效率不稳定,但即使在最恶劣的情况下,也要优于map),这里的关键就是需要有一个良好的HASH函数,至于STL中的HASH应该说来还是不错的。在网上很多人测试的过程中,unordered_map在很多情况下都处于上峰。但是在一些具体的场合,比如需要有序性和单次查询时,还是需要Map的,Map在新的标准出现后,一般都不推荐使用了。虽然Map的空间上占用要少于unordered_map。