hash_map基于hash table(哈希表)。 哈希表最大的优点,就是把数据的存储和查找消耗的时间大大降低,几乎可以看成是常数时间;而代价仅仅是消耗比较多的内存。然而在当前可利用内存越来越多的 情况下,用空间换时间的做法是值得的。另外,编码比较容易也是它的特点之一。
其基本原理是:使用一个下标范围比较大的数组来存储元素。可以设计一个函数(哈希函数,也叫做散列函数),使得每个元素的关键字都与一个函数值(即 数组下标,hash值)相对应,于是用这个数组单元来存储这个元素;也可以简单的理解为,按照关键字为每一个元素“分类”,然后将这个元素存储在相应 “类”所对应的地方,称为桶。
但是,不能够保证每个元素的关键字与函数值是一一对应的,因此极有可能出现对于不同的元素,却计算出了相同的函数值,这样就产生了“冲突”,换句话说,就是把不同的元素分在了相同的“类”之中。 总的来说,“直接定址”与“解决冲突”是哈希表的两大特点。
hash_map,首先分配一大片内存,形成许多桶。是利用hash函数,对key进行映射到不同区域(桶)进行保存。其插入过程是:
- 得到key
- 通过hash函数得到hash值
- 得到桶号(一般都为hash值对桶数求模)
- 存放key和value在桶内。
其取值过程是:
- 得到key
- 通过hash函数得到hash值
- 得到桶号(一般都为hash值对桶数求模)
- 比较桶的内部元素是否与key相等,若都不相等,则没有找到。
- 取出相等的记录的value。
hash_map中直接地址用hash函数生成,解决冲突用比较函数解决。这里可以看出,如果每个桶内部只有一个元素,那么查找的时候只有一次比较。当许多桶内没有值时,许多查询就会更快了(指查不到的时候)。要实现哈希表, 和用户相关的是:hash函数和比较函数。hash_map 查找速度会比map快,而且查找速度基本和数据数据量大小,属于常数级别;而map的查找速度是log(n)级别。并不一定常数就比log(n)小,hash还有hash函数的耗时,明白了吧,如果你考虑效率,特别是在元素达到一定数量级时,考虑考虑hash_map。但若你对内存使用特别严格,希望程序尽可能少消耗内存,那么一定要小心,hash_map可能会让你陷入尴尬,特别是当你的hash_map对象特别多时,你就更无法控制了,而且hash_map的构造速度较慢。
stl_hash_map.h源码:
// Filename: stl_hash_map.h
// Comment By: 凝霜
// E-mail: mdl2009@vip.qq.com
// Blog: http://blog.csdn.net/mdl13412
// hash_map和hash_multimap是对hashtable的简单包装, 很容易理解
/*
* Copyright (c) 1996
* Silicon Graphics Computer Systems, Inc.
*
* Permission to use, copy, modify, distribute and sell this software
* and its documentation for any purpose is hereby granted without fee,
* provided that the above copyright notice appear in all copies and
* that both that copyright notice and this permission notice appear
* in supporting documentation. Silicon Graphics makes no
* representations about the suitability of this software for any
* purpose. It is provided "as is" without express or implied warranty.
*
*
* Copyright (c) 1994
* Hewlett-Packard Company
*
* Permission to use, copy, modify, distribute and sell this software
* and its documentation for any purpose is hereby granted without fee,
* provided that the above copyright notice appear in all copies and
* that both that copyright notice and this permission notice appear
* in supporting documentation. Hewlett-Packard Company makes no
* representations about the suitability of this software for any
* purpose. It is provided "as is" without express or implied warranty.
*
*/
/* NOTE: This is an internal header file, included by other STL headers.
* You should not attempt to use it directly.
*/
#ifndef __SGI_STL_INTERNAL_HASH_MAP_H
#define __SGI_STL_INTERNAL_HASH_MAP_H
__STL_BEGIN_NAMESPACE
#if defined(__sgi) && !defined(__GNUC__) && (_MIPS_SIM != _MIPS_SIM_ABI32)
#pragma set woff 1174
#endif
// 如果编译器不能根据前面模板参数推导出后面使用的默认参数类型,
// 那么就需要手工指定, 并且对于基本的数据类型, 在<stl_hash_fun.h>
// 中都提供hash函数
#ifndef __STL_LIMITED_DEFAULT_TEMPLATES
template <class Key, class T, class HashFcn = hash<Key>,
class EqualKey = equal_to<Key>,
class Alloc = alloc>
#else
template <class Key, class T, class HashFcn, class EqualKey,
class Alloc = alloc>
#endif
class hash_map
{
private:
typedef hashtable<pair<const Key, T>, Key, HashFcn,
select1st<pair<const Key, T> >, EqualKey, Alloc> ht;
ht rep;
public:
// 注意: reference, pointer, iterator都为const, 因为不能修改hashtable
// 内部的元素, 否则会导致hashtable失效
typedef typename ht::key_type key_type;
typedef T data_type;
typedef T mapped_type;
typedef typename ht::value_type value_type;
typedef typename ht::hasher hasher;
typedef typename ht::key_equal key_equal;
typedef typename ht::size_type size_type;
typedef typename ht::difference_type difference_type;
typedef typename ht::pointer pointer;
typedef typename ht::const_pointer const_pointer;
typedef typename ht::reference reference;
typedef typename ht::const_reference const_reference;
typedef typename ht::iterator iterator;
typedef typename ht::const_iterator const_iterator;
// 返回hash相关函数
hasher hash_funct() const { return rep.hash_funct(); }
key_equal key_eq() const { return rep.key_eq(); }
public:
hash_map() : rep(100, hasher(), key_equal()) {}
explicit hash_map(size_type n) : rep(n, hasher(), key_equal()) {}
hash_map(size_type n, const hasher& hf) : rep(n, hf, key_equal()) {}
hash_map(size_type n, const hasher& hf, const key_equal& eql)
: rep(n, hf, eql) {}
#ifdef __STL_MEMBER_TEMPLATES
template <class InputIterator>
hash_map(InputIterator f, InputIterator l)
: rep(100, hasher(), key_equal()) { rep.insert_unique(f, l); }
template <class InputIterator>
hash_map(InputIterator f, InputIterator l, size_type n)
: rep(n, hasher(), key_equal()) { rep.insert_unique(f, l); }
template <class InputIterator>
hash_map(InputIterator f, InputIterator l, size_type n,
const hasher& hf)
: rep(n, hf, key_equal()) { rep.insert_unique(f, l); }
template <class InputIterator>
hash_map(InputIterator f, InputIterator l, size_type n,
const hasher& hf, const key_equal& eql)
: rep(n, hf, eql) { rep.insert_unique(f, l); }
#else
hash_map(const value_type* f, const value_type* l)
: rep(100, hasher(), key_equal()) { rep.insert_unique(f, l); }
hash_map(const value_type* f, const value_type* l, size_type n)
: rep(n, hasher(), key_equal()) { rep.insert_unique(f, l); }
hash_map(const value_type* f, const value_type* l, size_type n,
const hasher& hf)
: rep(n, hf, key_equal()) { rep.insert_unique(f, l); }
hash_map(const value_type* f, const value_type* l, size_type n,
const hasher& hf, const key_equal& eql)
: rep(n, hf, eql) { rep.insert_unique(f, l); }
hash_map(const_iterator f, const_iterator l)
: rep(100, hasher(), key_equal()) { rep.insert_unique(f, l); }
hash_map(const_iterator f, const_iterator l, size_type n)
: rep(n, hasher(), key_equal()) { rep.insert_unique(f, l); }
hash_map(const_iterator f, const_iterator l, size_type n,
const hasher& hf)
: rep(n, hf, key_equal()) { rep.insert_unique(f, l); }
hash_map(const_iterator f, const_iterator l, size_type n,
const hasher& hf, const key_equal& eql)
: rep(n, hf, eql) { rep.insert_unique(f, l); }
#endif /*__STL_MEMBER_TEMPLATES */
public:
// 下面都是对hashtable的简单封装, 见<stl_hashtable.h>
size_type size() const { return rep.size(); }
size_type max_size() const { return rep.max_size(); }
bool empty() const { return rep.empty(); }
void swap(hash_map& hs) { rep.swap(hs.rep); }
friend bool
operator== __STL_NULL_TMPL_ARGS (const hash_map&, const hash_map&);
iterator begin() { return rep.begin(); }
iterator end() { return rep.end(); }
const_iterator begin() const { return rep.begin(); }
const_iterator end() const { return rep.end(); }
public:
// 不允许插入key相同的元素
pair<iterator, bool> insert(const value_type& obj)
{ return rep.insert_unique(obj); }
#ifdef __STL_MEMBER_TEMPLATES
template <class InputIterator>
void insert(InputIterator f, InputIterator l) { rep.insert_unique(f,l); }
#else
void insert(const value_type* f, const value_type* l) {
rep.insert_unique(f,l);
}
void insert(const_iterator f, const_iterator l) { rep.insert_unique(f, l); }
#endif /*__STL_MEMBER_TEMPLATES */
pair<iterator, bool> insert_noresize(const value_type& obj)
{ return rep.insert_unique_noresize(obj); }
iterator find(const key_type& key) { return rep.find(key); }
const_iterator find(const key_type& key) const { return rep.find(key); }
// 如果key存在则返回对应的元素, 否则新建一个key
T& operator[](const key_type& key)
{
return rep.find_or_insert(value_type(key, T())).second;
}
// 下面封装见<stl_hashtable.h>
size_type count(const key_type& key) const { return rep.count(key); }
pair<iterator, iterator> equal_range(const key_type& key)
{ return rep.equal_range(key); }
pair<const_iterator, const_iterator> equal_range(const key_type& key) const
{ return rep.equal_range(key); }
size_type erase(const key_type& key) {return rep.erase(key); }
void erase(iterator it) { rep.erase(it); }
void erase(iterator f, iterator l) { rep.erase(f, l); }
void clear() { rep.clear(); }
public:
void resize(size_type hint) { rep.resize(hint); }
size_type bucket_count() const { return rep.bucket_count(); }
size_type max_bucket_count() const { return rep.max_bucket_count(); }
size_type elems_in_bucket(size_type n) const
{ return rep.elems_in_bucket(n); }
};
template <class Key, class T, class HashFcn, class EqualKey, class Alloc>
inline bool operator==(const hash_map<Key, T, HashFcn, EqualKey, Alloc>& hm1,
const hash_map<Key, T, HashFcn, EqualKey, Alloc>& hm2)
{
return hm1.rep == hm2.rep;
}
// 如果编译器支持模板函数特化优先级
// 那么将全局的swap实现为使用hash_map私有的swap以提高效率
#ifdef __STL_FUNCTION_TMPL_PARTIAL_ORDER
template <class Key, class T, class HashFcn, class EqualKey, class Alloc>
inline void swap(hash_map<Key, T, HashFcn, EqualKey, Alloc>& hm1,
hash_map<Key, T, HashFcn, EqualKey, Alloc>& hm2)
{
hm1.swap(hm2);
}
#endif /* __STL_FUNCTION_TMPL_PARTIAL_ORDER */
// hash_multimap和hash_map除去允许key重复外, 其余性质一致
#ifndef __STL_LIMITED_DEFAULT_TEMPLATES
template <class Key, class T, class HashFcn = hash<Key>,
class EqualKey = equal_to<Key>,
class Alloc = alloc>
#else
template <class Key, class T, class HashFcn, class EqualKey,
class Alloc = alloc>
#endif
class hash_multimap
{
private:
typedef hashtable<pair<const Key, T>, Key, HashFcn,
select1st<pair<const Key, T> >, EqualKey, Alloc> ht;
ht rep;
public:
typedef typename ht::key_type key_type;
typedef T data_type;
typedef T mapped_type;
typedef typename ht::value_type value_type;
typedef typename ht::hasher hasher;
typedef typename ht::key_equal key_equal;
typedef typename ht::size_type size_type;
typedef typename ht::difference_type difference_type;
typedef typename ht::pointer pointer;
typedef typename ht::const_pointer const_pointer;
typedef typename ht::reference reference;
typedef typename ht::const_reference const_reference;
typedef typename ht::iterator iterator;
typedef typename ht::const_iterator const_iterator;
hasher hash_funct() const { return rep.hash_funct(); }
key_equal key_eq() const { return rep.key_eq(); }
public:
hash_multimap() : rep(100, hasher(), key_equal()) {}
explicit hash_multimap(size_type n) : rep(n, hasher(), key_equal()) {}
hash_multimap(size_type n, const hasher& hf) : rep(n, hf, key_equal()) {}
hash_multimap(size_type n, const hasher& hf, const key_equal& eql)
: rep(n, hf, eql) {}
#ifdef __STL_MEMBER_TEMPLATES
template <class InputIterator>
hash_multimap(InputIterator f, InputIterator l)
: rep(100, hasher(), key_equal()) { rep.insert_equal(f, l); }
template <class InputIterator>
hash_multimap(InputIterator f, InputIterator l, size_type n)
: rep(n, hasher(), key_equal()) { rep.insert_equal(f, l); }
template <class InputIterator>
hash_multimap(InputIterator f, InputIterator l, size_type n,
const hasher& hf)
: rep(n, hf, key_equal()) { rep.insert_equal(f, l); }
template <class InputIterator>
hash_multimap(InputIterator f, InputIterator l, size_type n,
const hasher& hf, const key_equal& eql)
: rep(n, hf, eql) { rep.insert_equal(f, l); }
#else
hash_multimap(const value_type* f, const value_type* l)
: rep(100, hasher(), key_equal()) { rep.insert_equal(f, l); }
hash_multimap(const value_type* f, const value_type* l, size_type n)
: rep(n, hasher(), key_equal()) { rep.insert_equal(f, l); }
hash_multimap(const value_type* f, const value_type* l, size_type n,
const hasher& hf)
: rep(n, hf, key_equal()) { rep.insert_equal(f, l); }
hash_multimap(const value_type* f, const value_type* l, size_type n,
const hasher& hf, const key_equal& eql)
: rep(n, hf, eql) { rep.insert_equal(f, l); }
hash_multimap(const_iterator f, const_iterator l)
: rep(100, hasher(), key_equal()) { rep.insert_equal(f, l); }
hash_multimap(const_iterator f, const_iterator l, size_type n)
: rep(n, hasher(), key_equal()) { rep.insert_equal(f, l); }
hash_multimap(const_iterator f, const_iterator l, size_type n,
const hasher& hf)
: rep(n, hf, key_equal()) { rep.insert_equal(f, l); }
hash_multimap(const_iterator f, const_iterator l, size_type n,
const hasher& hf, const key_equal& eql)
: rep(n, hf, eql) { rep.insert_equal(f, l); }
#endif /*__STL_MEMBER_TEMPLATES */
public:
size_type size() const { return rep.size(); }
size_type max_size() const { return rep.max_size(); }
bool empty() const { return rep.empty(); }
void swap(hash_multimap& hs) { rep.swap(hs.rep); }
friend bool
operator== __STL_NULL_TMPL_ARGS (const hash_multimap&, const hash_multimap&);
iterator begin() { return rep.begin(); }
iterator end() { return rep.end(); }
const_iterator begin() const { return rep.begin(); }
const_iterator end() const { return rep.end(); }
public:
iterator insert(const value_type& obj) { return rep.insert_equal(obj); }
#ifdef __STL_MEMBER_TEMPLATES
template <class InputIterator>
void insert(InputIterator f, InputIterator l) { rep.insert_equal(f,l); }
#else
void insert(const value_type* f, const value_type* l) {
rep.insert_equal(f,l);
}
void insert(const_iterator f, const_iterator l) { rep.insert_equal(f, l); }
#endif /*__STL_MEMBER_TEMPLATES */
iterator insert_noresize(const value_type& obj)
{ return rep.insert_equal_noresize(obj); }
iterator find(const key_type& key) { return rep.find(key); }
const_iterator find(const key_type& key) const { return rep.find(key); }
size_type count(const key_type& key) const { return rep.count(key); }
pair<iterator, iterator> equal_range(const key_type& key)
{ return rep.equal_range(key); }
pair<const_iterator, const_iterator> equal_range(const key_type& key) const
{ return rep.equal_range(key); }
size_type erase(const key_type& key) {return rep.erase(key); }
void erase(iterator it) { rep.erase(it); }
void erase(iterator f, iterator l) { rep.erase(f, l); }
void clear() { rep.clear(); }
public:
void resize(size_type hint) { rep.resize(hint); }
size_type bucket_count() const { return rep.bucket_count(); }
size_type max_bucket_count() const { return rep.max_bucket_count(); }
size_type elems_in_bucket(size_type n) const
{ return rep.elems_in_bucket(n); }
};
template <class Key, class T, class HF, class EqKey, class Alloc>
inline bool operator==(const hash_multimap<Key, T, HF, EqKey, Alloc>& hm1,
const hash_multimap<Key, T, HF, EqKey, Alloc>& hm2)
{
return hm1.rep == hm2.rep;
}
#ifdef __STL_FUNCTION_TMPL_PARTIAL_ORDER
template <class Key, class T, class HashFcn, class EqualKey, class Alloc>
inline void swap(hash_multimap<Key, T, HashFcn, EqualKey, Alloc>& hm1,
hash_multimap<Key, T, HashFcn, EqualKey, Alloc>& hm2)
{
hm1.swap(hm2);
}
#endif /* __STL_FUNCTION_TMPL_PARTIAL_ORDER */
#if defined(__sgi) && !defined(__GNUC__) && (_MIPS_SIM != _MIPS_SIM_ABI32)
#pragma reset woff 1174
#endif
__STL_END_NAMESPACE
#endif /* __SGI_STL_INTERNAL_HASH_MAP_H */
// Local Variables:
// mode:C++
// End:
示例:
/*
*用来测试STL hash_map
*简单例子
*/
#include <cstdlib>
#include <iostream>
#include <string>
#include <hash_map.h>/*因为hash_map暂不为CPP标准所以没办法写为<hash_map>*/
/*-------------------------------------------*/
using std::cout;
using std::endl;
using std::string;
/*-------------------------------------------*/
/*函数类
*作为hash_map的hash函数
*string没有默认的hash函数
*/
class str_hash{
public:
size_t operator()(const string& str) const
{
unsigned long __h = 0;
for (size_t i = 0 ; i < str.size() ; i ++)
__h = 5*__h + str[i];
return size_t(__h);
}
};
/*-------------------------------------------*/
/*函数类
*作为hash_map的比较函数 )
*(查找的时候不同的key往往可能对用到相同的hash值
*/
class str_compare
{
public:
bool operator()(const string& str1,const string& str2)const
{return str1==str2;}
};
/*-------------------------------------------*/
int
main(int argc, char *argv[])
{
hash_map<string,string,str_hash,str_compare> myhash;
myhash["google"]="newplan";
myhash["baidu"]="zhaoziming";
if(myhash.find("google")!=myhash.end())
cout<<myhash["google"]<<endl;
system("PAUSE");
return EXIT_SUCCESS;
}
/*-------------------------------------------*/