按照先前的设想,写了一个一般性的hash表。为了支持各种类型,这个hash表写为模板。
Hashtable模板类
#pragma once
#include "util.h"
//默认情况下,MyComparer以及MyHasher可以工作于一般类型,特别是基本数据类型
//必须遵守一致性:两个key若是相等,其hashCode也一定相同。
template<class T>
class MyComparer
{
public:
bool equals(const T& key1, const T& key2)
{
return key1 == key2;
}
};
template <class T>
class MyHasher
{
public:
int hashCode(const T& t)
{
return (int)t;
}
};
template<class K, class V>
class HashtableEntry
{
public:
K key;
V value;
int hash;
HashtableEntry<K,V> *next;
};
//Hashtable的实现参考了java.util.HashMap以及其它一些实现(如MFC的CMap)。
//我希望这个Hashtable是个一般性的实现,Key可以为各种类型(需要实现相应Hash函数)。
//暂时可能不考虑Allocator以及一些优化,比如,批量申请entries,被删除的entries重新利用。
template <class K, class V,
class Comparer = MyComparer<K>,
class Hasher = MyHasher<K> >
class Hashtable
{
public:
//默认Hashtable大小
static const int DEFAULT_TABLE_SIZE = 16;
//默认负载因子(loadfactor=n/m,其中n为(K,V)对的数量,m为表的大小)
//static const float DEFAULT_LOAD_FACTOR = 0.75f; //VC can not compile!
Hashtable();
Hashtable(int tableSize);
virtual ~Hashtable();
//查找指定的key是否存在
bool find(K key);
//根据key查找value,若未找到,返回false
bool get(K key, V& value);
//将键值对存入hash表
void put(K key, V value);
bool put(K key, V value, V& oldValue);
//根据key删除相应的项
bool remove(K key);
//删除所有键值对(但表的大小不变)
void clear();
//按照给定的表大小重新hash
void rehash(int newTableSize);
//是否允许Hash表的大小自动增长,默认为true。此函数并不立即导致rehash。
//为true的情况下,当负载超过给定阀值时,就增大Hash表(表的大小翻倍)
void setAutogrow(bool autogrow, float loadFactor=0.75f);
int size() const { return _size; }
int getTableSize() const { return _tableSize; }
void print();
protected:
//使用了Comparer和Hasher之后,不再使用虚函数方式
//virtual int getHashCode(const K& key) = 0;
//virtual bool keyEquals(const K& key1, const K& key2) = 0;
private:
int _tableSize; //表大小
float _loadFactor;
bool _autogrow;
int _size; //键值对的数量
int _threshold;
HashtableEntry<K, V> **_table;
Comparer _comparer;
Hasher _hasher;
void initHashtable(int tableSize);
int hashIndex(int hash, int tableSize);
};
template<class K, class V, class Comparer, class Hasher>
Hashtable<K,V,Comparer,Hasher>::Hashtable()
{
//不能直接调用另一个构造函数,那会产生临时对象并在其上构造
//Hashtable(16);
initHashtable(DEFAULT_TABLE_SIZE);
}
template<class K, class V, class Comparer, class Hasher>
Hashtable<K,V,Comparer,Hasher>::Hashtable(int tableSize)
{
initHashtable(tableSize);
}
template<class K, class V, class Comparer, class Hasher>
void Hashtable<K,V,Comparer,Hasher>::initHashtable(int tableSize)
{
assert_exception(tableSize > 0, "bad table size");
_tableSize = tableSize;
_loadFactor = 0.75f; //DEFAULT_LOAD_FACTOR
_autogrow = true;
_threshold = (int)(_tableSize * _loadFactor);
_size = 0;
_table = new HashtableEntry<K, V> * [_tableSize];
assert_exception(_table != NULL, "out of memory");
//memset更快,但下面可读性更好
for (int i = 0; i < _tableSize; i++) {
_table[i] = NULL;
}
}
template<class K, class V, class Comparer, class Hasher>
void Hashtable<K,V,Comparer,Hasher>::setAutogrow(bool autogrow, float loadFactor)
{
_autogrow = autogrow;
_loadFactor = loadFactor;
if (_autogrow) {
_threshold = (int)(_tableSize * _loadFactor);
}
}
template<class K, class V, class Comparer, class Hasher>
Hashtable<K,V,Comparer,Hasher>::~Hashtable()
{
clear();
delete [] _table;
}
template<class K, class V, class Comparer, class Hasher>
int Hashtable<K,V,Comparer,Hasher>::hashIndex(int hash, int tableSize)
{
//index必须为正整数
int index = (hash & 0x7fffffff) % tableSize;
return index;
}
template<class K, class V, class Comparer, class Hasher>
bool Hashtable<K,V,Comparer,Hasher>::find(K key)
{
int hash = _hasher.hashCode(key);
int index = hashIndex(hash, _tableSize);
HashtableEntry<K,V> * pEntry;
for (pEntry = _table[index]; pEntry != NULL; pEntry = pEntry->next)
{
if (pEntry->hash == hash && _comparer.equals(key, pEntry->key))
{
return true;
}
}
return false;
}
template<class K, class V, class Comparer, class Hasher>
bool Hashtable<K,V,Comparer,Hasher>::get(K key, V& value)
{
int hash = _hasher.hashCode(key);
int index = hashIndex(hash, _tableSize);
HashtableEntry<K,V> * pEntry;
for (pEntry = _table[index]; pEntry != NULL; pEntry = pEntry->next)
{
if (pEntry->hash == hash && _comparer.equals(key, pEntry->key))
{
value = pEntry->value;
return true;
}
}
return false;
}
template<class K, class V, class Comparer, class Hasher>
void Hashtable<K,V,Comparer,Hasher>::put(K key, V value)
{
int hash = _hasher.hashCode(key);
int index = hashIndex(hash, _tableSize);
HashtableEntry<K,V> * pEntry;
for (pEntry = _table[index]; pEntry != NULL; pEntry = pEntry->next)
{
if (pEntry->hash == hash && _comparer.equals(key, pEntry->key))
{
pEntry->value = value;
return;
}
}
pEntry = new HashtableEntry<K,V> ();
assert_exception(pEntry != NULL, "new failed(out of memory?)");
pEntry->key = key;
pEntry->value = value;
pEntry->hash = hash;
pEntry->next = _table[index];
_table[index] = pEntry;
_size++;
if (_autogrow && _size >= _threshold) {
rehash(_tableSize * 2);
}
}
template<class K, class V, class Comparer, class Hasher>
void Hashtable<K,V,Comparer,Hasher>::rehash(int newTableSize)
{
assert_exception(newTableSize > 0, "bad rehash size");
HashtableEntry<K,V> ** newTable = new HashtableEntry<K,V> * [newTableSize];
assert_exception(newTable != NULL, "out of memory");
for (int i = 0; i < newTableSize; i++) {
newTable[i] = NULL;
}
//transfer from the old table to the new
for (int index = 0; index < _tableSize; index++)
{
HashtableEntry<K, V> * pEntry;
int newIndex;
for (pEntry = _table[index]; pEntry != NULL; )
{
HashtableEntry<K, V> * pNextEntry = pEntry->next;
newIndex = hashIndex(pEntry->hash, newTableSize);
pEntry->next = newTable[newIndex];
newTable[newIndex] = pEntry;
pEntry = pNextEntry;
}
}
delete [] _table;
_table = newTable;
_tableSize = newTableSize;
_threshold = (int)(_tableSize * _loadFactor);
}
template<class K, class V, class Comparer, class Hasher>
bool Hashtable<K,V,Comparer,Hasher>::remove(K key)
{
int hash = _hasher.hashCode(key);
int index = hashIndex(hash, _tableSize);
HashtableEntry<K,V> ** ppEntry = &(_table[index]);
HashtableEntry<K,V> * pEntry;
for (pEntry = *ppEntry; pEntry != NULL; pEntry = pEntry->next)
{
if (pEntry->hash == hash && _comparer.equals(key, pEntry->key))
{
*ppEntry = pEntry->next;
delete pEntry;
_size--;
return true;
}
ppEntry = &(pEntry->next);
}
return false;
}
template<class K, class V, class Comparer, class Hasher>
void Hashtable<K,V,Comparer,Hasher>::clear()
{
for (int index = 0; index < _tableSize; index++)
{
HashtableEntry<K, V> * pEntry;
for (pEntry = _table[index]; pEntry != NULL; )
{
HashtableEntry<K, V> * pNextEntry = pEntry->next;
delete pEntry;
pEntry = pNextEntry;
}
_table[index] = NULL;
}
_size = 0;
}
template<class K, class V, class Comparer, class Hasher>
void Hashtable<K,V,Comparer,Hasher>::print()
{
printf("Hashtable tableSize=%d, size=%d, loadFactor=%f\n",
_tableSize, _size, _loadFactor);
}
可以写一些代码来测试它。比如:
void testHashtable()
{
Hashtable<int,float> mapInt2Float;
int k1=1,k2=2;
float f1=0.5f,f2=0.6f;
mapInt2Float.put(k1,f1);
mapInt2Float.put(k2,f2);
mapInt2Float.put(17,f1);
mapInt2Float.put(17,0.8f);
mapInt2Float.print();
Hashtable<int,int> mapInt2Int;
mapInt2Int.setAutogrow(true, 60);
for (int i = 0; i< 100000; i++)
{
mapInt2Int.put(i,i);
}
for (int i = 0; i < 100000;i++)
{
int x;
if (!mapInt2Int.get(i,x) || x != i) {
char *msg = "error";
}
}
}
关于符号表:可以实现为Key为String类型的hash表。我也写了一个String类,如下。
String.h
#pragma once
//支持异常(Exception)之后,异常需要String
class String
{
public:
String(void);
String(const char *str);
String(const char *str, int len);
String(const String& strObj);
String& operator = (const String& strObj);
String& operator = (const char *str);
~String(void);
//注意:不要在临时对象上调用const char *转换
//因为临时对象析构之后,const char *所指向的String数据已经被释放
const char *cstr() const;
//重载const char *
operator const char *() const;
bool operator == (const String& strObj) const;
int length() const;
private:
//The internal string data, may be shared between String objects.
//The internal data layout as following:
//struct {
//int refCount; //reference count
//char data[];
//};
char *_refData;
int _length;
static char _empty;
void initCopyString(const char *str, int len);
};
String.cpp
#include "String.h"
#include <stdio.h>
#include <string.h>
char String::_empty = 0;
String::String(void)
{
_refData = NULL;
_length = 0;
}
String::String(const char *str)
{
int len = strlen(str);
initCopyString(str, len);
}
String::String(const char *str, int len)
{
initCopyString(str, len);
}
void String::initCopyString(const char *str, int len)
{
//allocates memory to hold the string, include terminal null character.
_refData = new char [sizeof(int) + len + 1];
if (_refData != NULL)
{
int *pRefCount = (int *)_refData;
*pRefCount = 1;
memcpy(_refData + sizeof(int), str, len);
_refData[sizeof(int) + len]='\0';
_length = len;
}
}
//copy-constructor
String::String(const String& strObj)
{
_refData = strObj._refData;
_length = strObj._length;
if (_refData != NULL) {
int *pRefCount = (int *)_refData;
(*pRefCount)++;
}
}
//assign
String& String::operator = (const String& strObj)
{
if (this == &strObj) {
return *this;
}
if (_refData != NULL) {
int *pRefCount = (int *)_refData;
(*pRefCount)--;
if (*pRefCount == 0) {
delete [] _refData;
}
}
_refData = strObj._refData;
_length = strObj._length;
if (_refData != NULL) {
int *pRefCount = (int *)_refData;
(*pRefCount)++;
}
return *this;
}
String& String::operator = (const char *str)
{
if (_refData != NULL) {
int *pRefCount = (int *)_refData;
(*pRefCount)--;
if (*pRefCount == 0) {
char *data = _refData + sizeof(int);
if (str == data) { //assign to self ?
return *this;
}
delete [] _refData;
}
}
int len = strlen(str);
initCopyString(str, len);
return (*this);
}
String::~String(void)
{
if (_refData != NULL) {
int *pRefCount = (int *)_refData;
(*pRefCount)--;
if (*pRefCount == 0) {
delete [] _refData;
}
}
}
const char * String::cstr() const
{
if (_refData == NULL) {
return &_empty;
}
char *p = _refData + sizeof(int);
return p;
}
int String::length() const {
return _length;
}
String::operator const char * () const
{
return cstr();
}
bool String::operator == (const String& strObj) const
{
if (this == &strObj) {
return true;
}
if (_length != strObj._length) {
return false;
}
if (_length == 0) {
return true;
}
const char *cstr1 = cstr();
const char *cstr2 = strObj.cstr();
if (cstr1 == cstr2) {
return true;
}
for (int i = 0; i < _length; i++) {
if (*cstr1++ != *cstr2++) {
return false;
}
}
return true;
}
有了String对象之后,我们可以把符号表实现为Hashtable<String,X>,其中X为其它类型。对于String类型,需要实现Comparer和Hasher。例如:
class StringComparer
{
public:
bool equals(const String& key1, const String& key2)
{
return (key1 == key2);
}
};
class StringHasher
{
public:
//参考java.lang.String的hash计算方法
int hashCode(const String& t)
{
int hash = 0;
const char * str = t.cstr();
int length = t.length();
for (int i = 0; i< length; i++)
{
hash = 31*hash + str[i];
}
return hash;
}
};
void testHashtable()
{
Hashtable<String,int,StringComparer,StringHasher> mapStr2Int;
mapStr2Int.put("abc",1);
mapStr2Int.put("def",2);
mapStr2Int.put("kkk",3);
int x,y,z;
mapStr2Int.get("abc",x);
mapStr2Int.get("def",y);
mapStr2Int.get("kk",z);
}
(未尽之处)符号表与hash表:
1。符号是否存储在一个永久区内?
假如符号不会被删除,就可以考虑存储于永久区内。
虚拟机的永久区可以用一个大数组或者数组的链表来实现。
2。Allocator,Hash元素空间的批量申请,以及元素空间的重复利用
Allocator,就是使用特定的内存分配器,通常是为了性能自定义的内存分配器,而不用缺省的new/delete。
当往Hash表插入一个元素的时候,可以考虑一次批量申请元素空间,这样不必每次插入元素的时候都申请空间。
元素空间的重复利用,是指被删除的元素,其空间不要立即释放,而是放回一个freelist中,下次插入元素的时候,可以从freelist中重新拿来使用,这样避免了新申请内存。
这些做法通常都是为了提高性能。