【数据结构】：哈希表（hashtable）

最新推荐文章于 2024-08-12 16:54:46 发布

ChaseRaod

最新推荐文章于 2024-08-12 16:54:46 发布

阅读量688

点赞数 1

分类专栏：数据结构文章标签：数据结构

本文链接：https://blog.csdn.net/ChaseRaod/article/details/78541327

版权

数据结构专栏收录该内容

26 篇文章 1 订阅

订阅专栏

hashtable—-哈希表，也称散列表，是根据关键字直接访问在内存存储位置的数据结构。它通过一个关键值得函数将所需的数据映射到所需的位置来访问数据，这个映射函数叫散列函数，存放记录的数组叫做散列表。

构造哈希表的几种方法。
1，直接定址法—-取关键字的某个线性函数为散列地址，Hash(key)=key或Hash（key）=key%p.
2，除留余数法—-取关键值被某个不大于散列表m的数p除后的所得的余数为散列地址。Hash（key）=key%p.
3，平方取中法
4，折叠法
5，随机数法
6，数学分析法
我们今天主要介绍直接定制法（也称开放定址法、闭散列方法）和除留余数法（也称开链法、拉链法、哈希桶）

不同的key值经过哈希函数Hash（key）后可能产生相同的值，我们将这称为哈希冲突或哈希碰撞。
哈希冲突与在载荷因子有关 a = 填入表中的元素个数/散列表的长度
负载因子越小效率越高但浪费空间越多
负载因子越大产生的冲突可能性越大
载荷因子应严格控制在0.7-0.8以下，超过限制的值就要进行resize

一、闭散列方法-开放定址法
1，线性探测
（1）直接定址：当hash函数就是Hash(key)=key，直接将元素定位在数组中所对应的位置上，这种方法准确但实在太浪费空间，当元素少但范围大时浪费的空间太多。
（2）线性探测：当给出的空间最大是p时，hash函数时Hash（key） = key%p,当产生冲突时，将数据放在其后第一个没有存数据的位置。这种方法解决了空间问题，但容易造成冲突,使hash拥堵。
（3）二次探测：hash函数是Hash(key)+0,Hash(key)+1^2,Hash(key)+2^2,……Hash(key)+i^2.二次探测是对线性表探测的改进。

这里写图片描述

#pragma once

#include<iostream>
using namespace std;
#include<vector>
#include<string>
#include<time.h>

//线性探测
enum State
{
    EMPTY,//NULL
    EXIST,//存在
    DELETE,//删除
};

template<class K>
struct __HashFunc
{
    size_t operator()(const K& key)
    {
        return (size_t)key;
    }
};

//字符串哈希算法
struct _HashFuncString
{
    static size_t BKDRHash(const char* str)
    {
        unsigned int seed = 131;// 31 131 1313 13131 131313
        unsigned int hash = 0;
        while (*str)
        {
            hash = hash * seed + (*str++);
        }
        return hash;
    }

    size_t operator()(const string& s)
    {
        return BKDRHash(s.c_str());
    }
};

template<class K,class V>
struct HashNode
{
    State _state;
    K _key;
    V _value;
};


template<class K,class V,class _HashFunc = __HashFunc<K>>
class HashTable
{
    typedef HashNode<K, V> Node;
public:
    HashTable()
        :_size(0)
    {}


    bool InSert(const K& key, const V& value)
    {
        CheckCapacity();
        size_t index = HashFunc(key);
        size_t first = index;
        size_t i = 1;

        while (_tables[index]._state == EXIST)
        {
            if (_tables[index]._key == key)
            {
                if (_tables[index]._state == EXIST)
                {
                    return  false;
                }
            }
            //index += (first + i * i)%_tables.size();
            //++i;
            ++index;
            if (index == _tables.size())
            {
                index = 0;
            }
        }
        _tables[index]._key = key;
        _tables[index]._value = value;
        _tables[index]._state = EXIST;
        ++_size;
        return true;
    }

    Node* Find(const K& key)
    {
        size_t index = HashFunc(key);
        while (_tables[index]._state != EMPTY)
        {
            if (_tables[index]._key == key)
            {
                return &_tables[index];
            }
            ++index;
            if (index == _tables.size())
            {
                index = 0;
            }
        }
        return NULL;
    }

    bool Erase(const K& key)
    {
        HashNode* ret = Find(key);
        if (ret)
        {
            //伪删除法
            --_size;
            ret->_state = DELETE;
        }
        else
            return false;
    }

//素数表
    size_t GetNextPrime(size_t value)
    {
        const int _PrimeSize = 28;
        static const unsigned long _PrimeList[_PrimeSize] =
        {
            53ul, 97ul, 193ul, 389ul, 769ul,
            1543ul, 3079ul, 6151ul, 12289ul, 24593ul,
            49157ul, 98317ul, 196613ul, 393241ul,786433ul,
            1572869ul, 3145739ul, 6291469ul, 12582917ul,25165843ul,
            50331653ul, 100663319ul, 201326611ul, 402653189ul,805306457ul,
            1610612741ul, 3221225473ul, 4294967291ul
        };
        for (size_t i = 0; i < _PrimeList; ++i)
        {
            if (_PrimeList[i] > value)
                return _PrimeList[i];
        }
        return _PrimeList[_PrimeSize - 1];
    }

    void CheckCapacity()
    {
        if (_tables.size() == 0 || _size*10 / _tables.size() >= 7)
        {
            /*vector<Node> newTables;
            newTables.resize(newSize);*/
            HashTable<K, V,_HashFunc> newTable;
            newTable._tables.resize(GetNextPrime(_tables.size());
            for (size_t i = 0; i < _tables.size(); ++i)
            {
                    newTable.InSert(_tables[i]._key, _tables[i]._value);
            }
            //vector中的swap只交换指针
            _tables.swap(newTable._tables);
        }
    }

    size_t HashFunc(const K& key)
    {
        _HashFunc hashfunc;
        return hashfunc(key) % _tables.size();
    }
protected:
    vector<Node> _tables;//size是表的大小
    size_t _size;//表有效数据的个数
};



void TestHashTable()
{
    HashTable<int, int> ht1;
    ht1.InSert(89,0);
    ht1.InSert(18, 0);
    ht1.InSert(49, 0);
    ht1.InSert(58, 0);
    ht1.InSert(9, 0);

    //返回当前时间的时间戳
    srand(time(0));
    for (size_t i = 0; i < 53; ++i)
    {
        ht1.InSert(rand(), 0);
    }

    HashTable<string, string,_HashFuncString> dict;
    dict.InSert("left", "左边");
    dict.InSert("right", "右边");
}

代码中，用到了素数表，是为了减少哈希冲突。字符串哈希算法是将字符串转化成整型从而进行HashFunc的算法。
字符串哈希算法

二，开链法/拉链法（哈希桶）
哈希桶是利用一个数组来存放节点的指针，里面并不存放任何数据，而这个数组下面所挂的单链表用来存放经过HashFunc（key）后映射位置相同的元素。
这里写图片描述

我们将开链法的hash表实现为加了迭代器版的，因为它只是底层的实现，C++库中的unordered_map和unordered_set是对它的一层封装。

#pragma once

#include<iostream>
using namespace std;
#include<vector>
#include<string>
#include<time.h>

template<class ValueType>
struct HashNode
{
    /*K _key;
    V _value;*/

    /*HashNode(const K& key, const V& value)
        :_key(key)
        , _value(value)
        , _next(NULL)
    {}*/

    ValueType _valueField;
    HashNode<ValueType>* _next;
    HashNode(const ValueType& v)
        :_valueField(v)
        ,_next(NULL)
    {}
};

//默认是整型
template<class K>
struct __HashFunc
{
    size_t operator()(const K& key)
    {
        return (size_t)key;
    }
};

struct _HashFuncString
{
    static size_t BKDRHash(const char* str)
    {
        unsigned int seed = 131;// 31 131 1313 13131 131313
        unsigned int hash = 0;
        while (*str)
        {
            hash = hash * seed + (*str++);
        }
        return hash;
    }

    size_t operator()(const string& s)
    {
        return BKDRHash(s.c_str());
    }
};

//迭代器
//前置声明
template<class K, class ValueType, class KeyOfValue, class _HashFunc = __HashFunc<K>>
class HashTable;

template <class K, class ValueType, class KeyOfValue, class _HashFunc = __HashFunc<K>>
struct __HashTableIterator
{
    typedef HashNode<ValueType> Node;
    typedef __HashTableIterator<K, ValueType, KeyOfValue, _HashFunc> Self;
    Node* _node;
    HashTable<K, ValueType, KeyOfValue, _HashFunc>* _hashtable;
    __HashTableIterator(Node* node, HashTable<K, ValueType, KeyOfValue, _HashFunc>* hashtable)
        :_node(node)
        ,_hashtable(hashtable)
    {}

    ValueType& operator*()
    {
        return _node->_valueField;
    }

    ValueType* operator->()
    {
        return &(operator*());
    }

    bool operator == (const Self& s)const 
    {
        return _node == s._node;
    }

    bool operator != (const Self& s)const 
    {
        return _node != s._node;
    }

    Self& operator++()
    {
        if (_node->_next != NULL)
        {
            _node = _node->_next;
        }
        else
        {
            KeyOfValue keyofvalue;
            size_t index = _hashtable->HashFunc(keyofvalue(_node->_valueField),
                _hashtable->_tables.size());
            Node* cur = NULL;
            //为什么是index+1?因为考虑到可能是hashtable最后一个节点,则不需要再++
            for (size_t i = index+1; i < _hashtable->_tables.size(); ++i)
            {//如果不为NULL
                if (_hashtable->_tables[i])
                {
                    cur = _hashtable->_tables[i];
                    break;
                }
            }
            _node = cur;
        }
        return *this;
    }

    Self operator++(int)
    {
        Self tmp(*this);
        ++tmp;
        return *this;
    }
};

//set
template<class K>
struct __KeyOfK
{
    const K& operator()(const K& key)
    {
        return key;
    }
};
//map
template<class K,class V>
struct __KeyOfKV
{
    const K& operator()(const pair<K, V>& kv)
    {
        return kv.first;
    }
};
//UnOrderedMap<K,V> -> HashTable<K,pair<K,V>,__KeyOfKV<K,V>>
//UnOrderedSet<K>   -> HashTable<K,K,__KeyOfK<K>>

template<class K, class ValueType, class KeyOfValue, class _HashFunc = __HashFunc<K>>
class HashTable
{
    typedef  HashNode<ValueType> Node;
    friend class __HashTableIterator<K, ValueType, KeyOfValue, _HashFunc>;
public:
    typedef __HashTableIterator<K, ValueType, KeyOfValue, _HashFunc> Iterator;
    HashTable()
        :_size(0)
    {}

    ~HashTable()
    {
        for (size_t i = 0; i < _tables.size(); ++i)
        {
            Node* cur = _tables[i];
            while (cur)
            {
                Node* next = cur->_next;
                delete cur;
                cur = next;
            }
            _tables[i] = NULL;
        }
    }

    //bool InSert(const K& key, const V& value)
    //{
    //  CheckCapacity();
    //  size_t index = HashFunc(key,_tables.size());
    //  Node* cur = _tables[index];
    //  while (cur)
    //  {
    //      if (cur->_key == key)
    //      {
    //          return false;
    //      }
    //      cur = cur->_next;
    //  }
    //  //头插
    //  Node* tmp = new Node(key, value);
    //  tmp->_next = _tables[index];
    //  _tables[index] = tmp;
    //  ++_size;
    //}

    pair<Iterator,bool> InSert(const ValueType& kv)
    {
        CheckCapacity();
        KeyOfValue keyofvalue;
        size_t index = HashFunc(keyofvalue(kv), _tables.size());
        Node* cur = _tables[index];
        while (cur)
        {
            if (keyofvalue(cur->_valueField) == keyofvalue(kv))
            {
                return make_pair(Iterator(cur,this),false);
            }
            cur = cur->_next;
        }
        //头插
        Node* tmp = new Node(kv);
        tmp->_next = _tables[index];
        _tables[index] = tmp;
        ++_size;
        return make_pair(Iterator(tmp,this),true);
    }

    //素数表
    size_t GetNextPrime(size_t value)
    {
        const int _PrimeSize = 28;
        static const unsigned long _PrimeList[_PrimeSize] =
        {
            53ul, 97ul, 193ul, 389ul, 769ul,
            1543ul, 3079ul, 6151ul, 12289ul, 24593ul,
            49157ul, 98317ul, 196613ul, 393241ul, 786433ul,
            1572869ul, 3145739ul, 6291469ul, 12582917ul, 25165843ul,
            50331653ul, 100663319ul, 201326611ul, 402653189ul, 805306457ul,
            1610612741ul, 3221225473ul, 4294967291ul
        };
        for (size_t i = 0; i < _PrimeSize; ++i)
        {
            if (_PrimeList[i] > value)
                return _PrimeList[i];
        }
        return _PrimeList[_PrimeSize - 1];
    }


    void CheckCapacity()
    {
        if (_size == _tables.size())
        {
            KeyOfValue keyofvalue;
            vector<Node*> newTables;
            newTables.resize(GetNextPrime(_tables.size()));
            for (size_t i = 0; i < _tables.size(); ++i)
            {
                Node* cur = _tables[i];
                while (cur)
                {
                    Node* next = cur->_next;
                    //头插
                    size_t index = HashFunc(keyofvalue(cur->_valueField), newTables.size());
                    cur->_next = newTables[index];
                    newTables[index] = cur;

                    cur = next;
                }
                _tables[i] = NULL;
            }
            _tables.swap(newTables);
        }
    }

    Iterator Find(const K& key)
    {
        KeyOfValue keyofvalue;
        size_t index = HashFunc(key, _tables.size());
        Node* cur = _tables[index];
        while (cur)
        {
            if (keyofvalue(cur->_valueField)== key)
                return Iterator(cur,this);
            cur = cur->_next;
        }
        return End();
    }

    void Erase(Iterator pos)
    {
        KeyOfValue keyofvalue;
        size_t index = HashFunc(keyofvalue(*pos), _tables.size());
        Node* prev = NULL;
        Node* cur = _tables[index];
        while (cur)
        {
            if (keyofvalue(cur->_valueField) == key)
            {
                if (prev == NULL)//cur是头要特殊处理
                {
                    _tables[index] = cur->_next;
                }
                else
                {
                    prev->_next = cur->_next;
                }
                delete cur;
                --_size;
            }
            prev = cur;
            cur = cur->_next;
        }
    }

    size_t Erase(const K& key)
    {
        Iterator pos = Find(key);
        if (pos != End())
        {
            Erase(pos);
            return 1;
        }
        else
            return 0;
    }

    size_t HashFunc(const K& key,size_t size)
    {
        _HashFunc hushfuc;
        return hushfuc(key)%size;
    }

    Iterator Begin()
    {//返回第一个不为NULL的节点
        Node* cur = NULL;
        for (size_t i = 0; i < _tables.size(); ++i)
        {
            if (_tables[i])
            {
                cur = _tables[i];
                break;
            }
        }
        return Iterator(cur, this);//不是单参数的构造函数不能进行隐式类型转换
    }

    Iterator End()
    {
        return Iterator(NULL, this);
    }
private:
    vector<Node*> _tables;//vector里面存的是节点
    size_t _size;
};

void TestHashTable()
{
    HashTable<int, int,__KeyOfK<int>> ht1;//set
    ht1.InSert(1);
    ht1.InSert(5);
    ht1.InSert(1);
    ht1.InSert(8);
    HashTable<int, int, __KeyOfK<int>>::Iterator it1 = ht1.Begin();
    while (it1 != ht1.End())
    {
        cout << *it1 << " ";
        ++it1;
    }
    cout << endl;
    //返回当前时间的时间戳
    /*srand(time(0));
    for (size_t i = 0; i < 53; ++i)
    {
        ht1.InSert(rand(), 0);
    }*/

    HashTable<string, pair<string, string>, __KeyOfKV<string, string>, _HashFuncString> dict;//map
    dict.InSert(make_pair("left", "左边"));
    dict.InSert(make_pair("right", "右边"));
}

unordered_map类似于map的特性，是KV模型，不允许键值冗余。

#include"HashLink.h"

template<class K, class V, class _HashFunc = __HashFunc<K>>
class UnOrderedMap
{
    typedef HashNode<pair<K,V>> Node;
public:
    typedef typename HashTable<K, pair<K, V>, __KeyOfKV<K, V>, __HashFunc<K>>::Iterator Iterator;
    pair<Iterator,bool> InSert(const pair<K,V>& kv)
    {
        return ht.InSert(kv);
    }

    Iterator Find(const K& key)
    {
        return ht.Find(key);
    }

    void Erase(Iterator pos)
    {
        return ht.Erase(pos);
    }
private:
    HashTable<K, pair<K,V>, __KeyOfKV<K,V>, __HashFunc<K>> ht;
};

unordered_set类似于set的特性，是K模型，同样不允许键值冗余。

#include"HashLink.h"

template<class K,class _HashFunc = __HashFunc<K>>
class UnOrderedSet
{
    typedef HashNode<K> Node;
public:
    typedef typename HashTable<K, K, __KeyOfK<K>, __HashFunc<K>>::Iterator Iterator;
    pair<Iterator,bool> InSert(const K& key)
    {
        return ht.InSert(key);
    }

    size_t Erase(const K& key)
    {
        return ht.Erase(key);
    }

    Iterator Find(const K& key)
    {
        return ht.Find(key);
    }

    void Erase(Iterator pos)
    {
        ht.Erase(pos);
    }

private:
    HashTable<K, K, __KeyOfK<K>,__HashFunc<K>> ht;
};