一个hash表的实现

最新推荐文章于 2022-06-08 17:23:48 发布

simpleman7210

最新推荐文章于 2022-06-08 17:23:48 发布

阅读量733

点赞数

本文链接：https://blog.csdn.net/simpleman7210/article/details/9570455

版权

按照先前的设想，写了一个一般性的hash表。为了支持各种类型，这个hash表写为模板。

Hashtable模板类

#pragma once

#include "util.h"

//默认情况下，MyComparer以及MyHasher可以工作于一般类型，特别是基本数据类型
//必须遵守一致性：两个key若是相等，其hashCode也一定相同。
template<class T>
class MyComparer
{
public:
    bool equals(const T& key1, const T& key2)
    {
        return key1 == key2;
    }
};

template <class T>
class MyHasher
{
public:
    int hashCode(const T& t)
    {
        return (int)t;
    }
};

template<class K, class V>
class HashtableEntry
{
public:
K key;
V value;
int hash;
HashtableEntry<K,V> *next;
};

//Hashtable的实现参考了java.util.HashMap以及其它一些实现（如MFC的CMap）。
//我希望这个Hashtable是个一般性的实现，Key可以为各种类型（需要实现相应Hash函数）。
//暂时可能不考虑Allocator以及一些优化，比如，批量申请entries，被删除的entries重新利用。
template <class K, class V,
        class Comparer = MyComparer<K>, 
        class Hasher = MyHasher<K> >
class Hashtable
{
public:
//默认Hashtable大小
static const int DEFAULT_TABLE_SIZE = 16;
//默认负载因子（loadfactor=n/m，其中n为(K,V)对的数量，m为表的大小）
//static const float DEFAULT_LOAD_FACTOR = 0.75f;   //VC can not compile!

Hashtable();
Hashtable(int tableSize);
virtual ~Hashtable();

//查找指定的key是否存在
bool find(K key);
//根据key查找value，若未找到，返回false
bool get(K key, V& value);
//将键值对存入hash表
void put(K key, V value);
bool put(K key, V value, V& oldValue);
//根据key删除相应的项
bool remove(K key);
//删除所有键值对（但表的大小不变） 
void clear();
//按照给定的表大小重新hash
void rehash(int newTableSize);
//是否允许Hash表的大小自动增长，默认为true。此函数并不立即导致rehash。
//为true的情况下，当负载超过给定阀值时，就增大Hash表（表的大小翻倍）
void setAutogrow(bool autogrow, float loadFactor=0.75f);
int size() const { return _size; }
int getTableSize() const { return _tableSize; }
void print();

protected:
//使用了Comparer和Hasher之后，不再使用虚函数方式
//virtual int getHashCode(const K& key) = 0;
//virtual bool keyEquals(const K& key1, const K& key2) = 0;

private:
int _tableSize; //表大小
float _loadFactor;
bool _autogrow;
int _size;  //键值对的数量
int _threshold;
HashtableEntry<K, V> **_table;
Comparer _comparer;
Hasher _hasher;

void initHashtable(int tableSize);
int hashIndex(int hash, int tableSize);
};

template<class K, class V, class Comparer, class Hasher>
Hashtable<K,V,Comparer,Hasher>::Hashtable()
{
    //不能直接调用另一个构造函数，那会产生临时对象并在其上构造
    //Hashtable(16);
    initHashtable(DEFAULT_TABLE_SIZE);
}

template<class K, class V, class Comparer, class Hasher>
Hashtable<K,V,Comparer,Hasher>::Hashtable(int tableSize)
{
    initHashtable(tableSize);
}

template<class K, class V, class Comparer, class Hasher>
void Hashtable<K,V,Comparer,Hasher>::initHashtable(int tableSize)
{
    assert_exception(tableSize > 0, "bad table size");
    _tableSize = tableSize;
    _loadFactor = 0.75f;    //DEFAULT_LOAD_FACTOR
    _autogrow = true;
    _threshold = (int)(_tableSize * _loadFactor);
    _size = 0;
    _table = new HashtableEntry<K, V> * [_tableSize];
    assert_exception(_table != NULL, "out of memory");
    //memset更快，但下面可读性更好
    for (int i = 0; i < _tableSize; i++) {
        _table[i] = NULL;
    }
}

template<class K, class V, class Comparer, class Hasher>
void Hashtable<K,V,Comparer,Hasher>::setAutogrow(bool autogrow, float loadFactor)
{
    _autogrow = autogrow;
    _loadFactor = loadFactor;
    if (_autogrow) {
        _threshold = (int)(_tableSize * _loadFactor);
    }
}

template<class K, class V, class Comparer, class Hasher>
Hashtable<K,V,Comparer,Hasher>::~Hashtable()
{
    clear();
    delete [] _table;
}

template<class K, class V, class Comparer, class Hasher>
int Hashtable<K,V,Comparer,Hasher>::hashIndex(int hash, int tableSize)
{
    //index必须为正整数
    int index = (hash & 0x7fffffff) % tableSize;
    return index;
}

template<class K, class V, class Comparer, class Hasher>
bool Hashtable<K,V,Comparer,Hasher>::find(K key)
{
    int hash = _hasher.hashCode(key);
    int index = hashIndex(hash, _tableSize);
    HashtableEntry<K,V> * pEntry;
    for (pEntry = _table[index]; pEntry != NULL; pEntry = pEntry->next)
    {
        if (pEntry->hash == hash && _comparer.equals(key, pEntry->key))
        {
           return true;
        }
    }
    return false;
}

template<class K, class V, class Comparer, class Hasher>
bool Hashtable<K,V,Comparer,Hasher>::get(K key, V& value)
{
    int hash = _hasher.hashCode(key);
    int index = hashIndex(hash, _tableSize);
    HashtableEntry<K,V> * pEntry;
    for (pEntry = _table[index]; pEntry != NULL; pEntry = pEntry->next)
    {
        if (pEntry->hash == hash && _comparer.equals(key, pEntry->key))
        {
            value = pEntry->value;
            return true;
        }
    }
    return false;
}

template<class K, class V, class Comparer, class Hasher>
void Hashtable<K,V,Comparer,Hasher>::put(K key, V value)
{
    int hash = _hasher.hashCode(key);
    int index = hashIndex(hash, _tableSize);
    HashtableEntry<K,V> * pEntry;
    for (pEntry = _table[index]; pEntry != NULL; pEntry = pEntry->next)
    {
        if (pEntry->hash == hash && _comparer.equals(key, pEntry->key))
        {
            pEntry->value = value;
            return;
        }
    }
    pEntry = new HashtableEntry<K,V> ();
    assert_exception(pEntry != NULL, "new failed(out of memory?)");
    pEntry->key = key;
    pEntry->value = value;
    pEntry->hash = hash;
    pEntry->next = _table[index];
    _table[index] = pEntry;
    _size++;
    if (_autogrow && _size >= _threshold) {
        rehash(_tableSize * 2);
    }
}

template<class K, class V, class Comparer, class Hasher>
void Hashtable<K,V,Comparer,Hasher>::rehash(int newTableSize)
{
    assert_exception(newTableSize > 0, "bad rehash size");
    HashtableEntry<K,V> ** newTable = new HashtableEntry<K,V> * [newTableSize];
    assert_exception(newTable != NULL, "out of memory");
    
    for (int i = 0; i < newTableSize; i++) {
        newTable[i] = NULL;
    }
    //transfer from the old table to the new
    for (int index = 0; index < _tableSize; index++)
    {
        HashtableEntry<K, V> * pEntry;
        int newIndex;
        for (pEntry = _table[index]; pEntry != NULL; )
        {
            HashtableEntry<K, V> * pNextEntry = pEntry->next;
            newIndex = hashIndex(pEntry->hash, newTableSize);
            pEntry->next = newTable[newIndex];
            newTable[newIndex] = pEntry;
            pEntry = pNextEntry;
        }
    }
    delete [] _table;
    _table = newTable;
    _tableSize = newTableSize;
    _threshold = (int)(_tableSize * _loadFactor);
}

template<class K, class V, class Comparer, class Hasher>
bool Hashtable<K,V,Comparer,Hasher>::remove(K key)
{
    int hash = _hasher.hashCode(key);
    int index = hashIndex(hash, _tableSize);
    HashtableEntry<K,V> ** ppEntry = &(_table[index]);
    HashtableEntry<K,V> * pEntry;
    for (pEntry = *ppEntry; pEntry != NULL; pEntry = pEntry->next)
    {
        if (pEntry->hash == hash && _comparer.equals(key, pEntry->key))
        {
            *ppEntry = pEntry->next;
            delete pEntry;
            _size--;
            return true;
        }
        ppEntry = &(pEntry->next);
    }
    return false;
}

template<class K, class V, class Comparer, class Hasher>
void Hashtable<K,V,Comparer,Hasher>::clear()
{
    for (int index = 0; index < _tableSize; index++)
    {
        HashtableEntry<K, V> * pEntry;
        for (pEntry = _table[index]; pEntry != NULL; )
        {
             HashtableEntry<K, V> * pNextEntry = pEntry->next;
             delete pEntry;
             pEntry = pNextEntry;
        }
       _table[index] = NULL;
   }
   _size = 0;
}

template<class K, class V, class Comparer, class Hasher>
void Hashtable<K,V,Comparer,Hasher>::print()
{
    printf("Hashtable tableSize=%d, size=%d, loadFactor=%f\n",
        _tableSize, _size, _loadFactor);
}

可以写一些代码来测试它。比如：

void testHashtable()
{
    Hashtable<int,float> mapInt2Float;
    int k1=1,k2=2;
    float f1=0.5f,f2=0.6f;
    mapInt2Float.put(k1,f1);
    mapInt2Float.put(k2,f2);
    mapInt2Float.put(17,f1);
    mapInt2Float.put(17,0.8f);
    mapInt2Float.print();

    Hashtable<int,int> mapInt2Int;
    mapInt2Int.setAutogrow(true, 60);
    for (int i = 0; i< 100000; i++)
    {
        mapInt2Int.put(i,i);
    }
    for (int i = 0; i < 100000;i++)
    {
        int x;
        if (!mapInt2Int.get(i,x) || x != i) {
            char *msg = "error";
        }
    }
}

关于符号表：可以实现为Key为String类型的hash表。我也写了一个String类，如下。

String.h

#pragma once

//支持异常(Exception)之后，异常需要String

class String
{
public:
String(void);
String(const char *str);
String(const char *str, int len);
String(const String& strObj);
String& operator = (const String& strObj);
String& operator = (const char *str);
~String(void);
//注意：不要在临时对象上调用const char *转换
//因为临时对象析构之后，const char *所指向的String数据已经被释放
const char *cstr() const;
//重载const char *
operator const char *() const;
bool operator == (const String& strObj) const;
int length() const;

private:
//The internal string data, may be shared between String objects.
//The internal data layout as following:
//struct {
//int refCount;   //reference count
//char data[];
//};
char *_refData;
int _length;
static char _empty;

void initCopyString(const char *str, int len);

};

String.cpp

#include "String.h"
#include <stdio.h>
#include <string.h>

char String::_empty = 0;

String::String(void)
{
    _refData = NULL;
    _length = 0;
}

String::String(const char *str)
{
    int len = strlen(str);
    initCopyString(str, len);
}

String::String(const char *str, int len)
{
    initCopyString(str, len);
}

void String::initCopyString(const char *str, int len)
{
    //allocates memory to hold the string, include terminal null character.
    _refData = new char [sizeof(int) + len + 1];
    if (_refData != NULL)
    {
        int *pRefCount = (int *)_refData;
        *pRefCount = 1;
        memcpy(_refData + sizeof(int), str, len);
        _refData[sizeof(int) + len]='\0';
        _length = len;
    }
}

//copy-constructor
String::String(const String& strObj)
{
    _refData = strObj._refData;
    _length = strObj._length;

    if (_refData != NULL) {
        int *pRefCount = (int *)_refData;
        (*pRefCount)++;
    }
}

//assign
String& String::operator = (const String& strObj)
{
    if (this == &strObj) {
        return *this;
    }

    if (_refData != NULL) {
        int *pRefCount = (int *)_refData;
        (*pRefCount)--;
        if (*pRefCount == 0) {
            delete [] _refData;
        }
    }

    _refData = strObj._refData;
    _length = strObj._length;
    if (_refData != NULL) {
        int *pRefCount = (int *)_refData;
        (*pRefCount)++;
    }

    return *this;
}

String& String::operator = (const char *str)
{
    if (_refData != NULL) {
        int *pRefCount = (int *)_refData;
        (*pRefCount)--;
        if (*pRefCount == 0) {
            char *data = _refData + sizeof(int);
            if (str == data) {    //assign to self ?
                return *this;
            }
            delete [] _refData;
        }
    }

    int len = strlen(str);
    initCopyString(str, len);
    return (*this);
}

String::~String(void)
{
    if (_refData != NULL) {
        int *pRefCount = (int *)_refData;
        (*pRefCount)--;
        if (*pRefCount == 0) {
            delete [] _refData;
        }
    }
}

const char * String::cstr() const
{
    if (_refData == NULL) {
        return &_empty;
    }

    char *p = _refData + sizeof(int);
    return p;
}

int String::length() const {
    return _length;
}

String::operator const char * () const
{
    return cstr();
}

bool String::operator == (const String& strObj) const
{
    if (this == &strObj) {
        return true;
    }
    if (_length != strObj._length) {
        return false;
    }
    if (_length == 0) {
        return true;
    }
    const char *cstr1 = cstr();
    const char *cstr2 = strObj.cstr();
    if (cstr1 == cstr2) {
        return true;
    }
    for (int i = 0; i < _length; i++) {
        if (*cstr1++ != *cstr2++) {
            return false;
        }
    }
    return true;
}

有了String对象之后，我们可以把符号表实现为Hashtable<String,X>，其中X为其它类型。对于String类型，需要实现Comparer和Hasher。例如：

class StringComparer
{
public:
    bool equals(const String& key1, const String& key2)
    {
        return (key1 == key2);
    }
};

class StringHasher
{
public:
    //参考java.lang.String的hash计算方法
    int hashCode(const String& t)
    {
        int hash = 0;
        const char * str = t.cstr();
        int length = t.length();
        for (int i = 0; i< length; i++)
        {
            hash = 31*hash + str[i];
        }
        return hash;
    }
};

void testHashtable()
{
    Hashtable<String,int,StringComparer,StringHasher> mapStr2Int;
    mapStr2Int.put("abc",1);
    mapStr2Int.put("def",2);
    mapStr2Int.put("kkk",3);
    int x,y,z;
    mapStr2Int.get("abc",x);
    mapStr2Int.get("def",y);
    mapStr2Int.get("kk",z);

}

（未尽之处）符号表与hash表：

1。符号是否存储在一个永久区内？
假如符号不会被删除，就可以考虑存储于永久区内。
虚拟机的永久区可以用一个大数组或者数组的链表来实现。

2。Allocator，Hash元素空间的批量申请，以及元素空间的重复利用
Allocator，就是使用特定的内存分配器，通常是为了性能自定义的内存分配器，而不用缺省的new/delete。
当往Hash表插入一个元素的时候，可以考虑一次批量申请元素空间，这样不必每次插入元素的时候都申请空间。
元素空间的重复利用，是指被删除的元素，其空间不要立即释放，而是放回一个freelist中，下次插入元素的时候，可以从freelist中重新拿来使用，这样避免了新申请内存。
这些做法通常都是为了提高性能。