数据结构简介:哈希表

最新推荐文章于 2024-08-04 21:48:35 发布

A鱼翔浅底A

最新推荐文章于 2024-08-04 21:48:35 发布

阅读量379

点赞数

分类专栏：数据结构

本文链接：https://blog.csdn.net/wangdamingll/article/details/104198484

版权

数据结构专栏收录该内容

12 篇文章 0 订阅

订阅专栏

数据结构简介

哈希表(散列表)

参考网址

https://blog.csdn.net/stpeace/article/details/81274233
https://blog.csdn.net/qq_36221862/article/details/73488162
https://blog.csdn.net/xyzbaihaiping/article/details/51607770

1. 定义

散列表的实现常常叫作散列(hashing),散列是一种用于以常数平均时间执行插入,删除和查找的技术.但是,那些需要元素间任何序信息的树操作在这里将不会得到有效的支持.
注意:表的大小(tablesize)一般是素数,这样产生冲突的时候会有更多的备选空间,更好的散列分布

2. 散列表解决冲突的方法

拉链法

hash(x)=x mod tablesize
装填因子a约等于1(tablesize应该为素数)
产生冲突时,元素应插入对应链表的前端,因为不仅方便,而且这个元素最有可能不久又被访问

开放地址法(线性探测法、平方探测法)

线性探测法
(1)hi(x) = ( hash(x) + f(i) ) mod tablesize,且f(0)=0;f(i)=i;
(2)装填因子a必须要小于等于0.5
(3)惰性删除
平方探测法
(1)hi(x) = ( hash(x) + f(i) ) mod tablesize,且f(0)=0;f(i)=i^2;
(2)装填因子a必须要小于等于0.5,并且tablesize必须为素数.因为这样的话,表至少有一半为空时,产生冲突时,平方探测法总能够插入成功
(3)惰性删除

双散列
f(i)=i * hash2(x)
hash2(x) = R -(x mod R),R为小于tablesize的素数
hi(x) = (hash(x) + f(i)) mod tablesize = (hash(x) + i * hash2(x)) mod tablesize

3. 以最坏情形O(1)访问的散列表

完美散列
杜鹃散列(布谷鸟散列)

常用较小的填装因子(小于0.5不包括0.5)且多于两个散列函数实现
示例代码

跳房子散列

尝试改进线性探测算法
定义:通过预先确定的、在计算机结构体系的基础上优化的常数,来为探测序列的最大长度定界.

示例代码

基于哈希表简单实现HashMap(拉链法)

代码仅供参考:github地址

#ifndef __HASH_MAP1_HPP__
#define __HASH_MAP1_HPP__

#include <iostream>
#include <string>
#include <memory>
#include <functional>
using namespace std;

namespace hashmap1{

//hash table 内部节点
template <typename T,typename U>
class Node{
public:
    Node(const T& key,const T& value){
        m_key = key;
        m_value = value;
    }
    ~Node(){
        std::cout<<"~Node()"<<std::endl;
    }
public:
    T m_key;
    U m_value;
    std::unique_ptr<Node<T,U>> pNode = nullptr;
};

//基于hash table 实现的HashMap 冲突解决使用拉链法
template <typename T,typename U>
class HashMap{
public:
    explicit HashMap(const unsigned int size){
        this->m_size = size;
        m_ppNode = std::make_unique<unique_ptr<Node<T,U>>[]>(m_size);
        for(unsigned int index=0;index<m_size;index++){
            m_ppNode[index].reset(nullptr);
        }
    }
public:
    void Insert(const T& key,const U& value){
        int index = std::hash<T>{}(key)%m_size; //std::hash C++ 17
        std::unique_ptr<Node<T,U>> newNode = std::make_unique<Node<T,U>>(key,value);
        if(m_ppNode[index] == nullptr){
            m_ppNode[index] = std::move(newNode);//内存是否被正确回收？待验证
            return ;
        }
        //map容器考虑去重
        Node<T,U>* node = m_ppNode[index].get();
        while(node->pNode!= nullptr){
            if(node->m_key == key){
                node->m_value = value;
                return;
            }
            node = node->pNode.get();
        }
        if(node->m_key == key){ //判断循环最后一个元素
            node->m_value = value;
            return ;
        }
        //没有重复的元素 采用尾插法
        node->pNode = std::move(newNode);
    }

    pair<U,bool> Find(const T&key){
        int index = std::hash<T>{}(key)%m_size;
        auto p = m_ppNode[index].get();
        while(p!= nullptr){
            if(p->m_key == key){
                return make_pair(p->m_value, true);
            }
            p = p->pNode.get();
        }
        return make_pair(U(), false);
    }
private:
    std::unique_ptr<unique_ptr<Node<T,U>>[]> m_ppNode = nullptr;//unique_ptr对象数组
    unsigned int m_size=0;
};

void TestHashMap1(){
    {
        HashMap<string,string> map(100);
        map.Insert("a","aaa");
        map.Insert("b","bbb");

        pair<string,bool> ret1 = map.Find("a");
        if(ret1.second){
            std::cout<<"find:value="<<ret1.first<<std::endl;
        } else{
            std::cout<<"not find"<<std::endl;
        }

        pair<string,bool> ret2 = map.Find("b");
        if(ret2.second){
            std::cout<<"find:value="<<ret2.first<<std::endl;
        } else{
            std::cout<<"not find"<<std::endl;
        }

        pair<string,bool> ret3 = map.Find("c");
        if(ret3.second){
            std::cout<<"find:value="<<ret3.first<<std::endl;
        } else{
            std::cout<<"not find"<<std::endl;
        }
    }
}

}
#endif

基于哈希表简单实现HashMap(线性探查法、二次探查法)

代码仅供参考:github地址

#ifndef __HASH_MAP2_HPP__
#define __HASH_MAP2_HPP__

#include <iostream>
#include <string>
#include <vector>
#include <cmath>

using namespace std;

namespace hashmap2{
enum State{
    EMPTY,
    DELETE,
    EXIST
};

//hash table 内部节点
template <typename K,typename V>
class HashNode{
public:
    HashNode():m_state(EMPTY){}
public:
    std::pair<K,V> m_kv;
    State m_state;
};

//基于hash table 实现的HashMap 冲突解决使用开放地址法
template <typename K,typename V,bool isLine = true> //模板偏特化
class HashMap{
public:
    explicit HashMap(const unsigned int size = 10):m_size(0){
        m_table.resize(GetNextPrime(size));
    }
public:
    bool Insert(const K& key,const V& value){
        CheckSize();
        unsigned int hashAddr = std::hash<K>{}(key)%m_table.size();//C++ 17
        unsigned int index = hashAddr;
        int offset = 1;
        while(m_table[index].m_state == EXIST){
            if(m_table[index].m_kv.first == key){
                return false;
            }
            if(isLine){
                index = DetectedLine(index);
            }else{
                index = DetectedSquare(index,offset);
            }
        }
        m_table[index].m_kv = std::make_pair(key,value);
        m_table[index].m_state = EXIST;
        m_size++;
        return true;
    }

    std::pair<HashNode<K,V>*,bool> Find(const K&key){
        unsigned int hashAddr = std::hash<K>{}(key)%m_table.size();//C++ 17
        unsigned int index = hashAddr;
        HashNode<K, V>& elem = m_table[index];
        if(elem.m_kv.first!=key){ //产生冲突了 继续寻找
            if(isLine){//线性查找
                while(true){
                    index = DetectedLine(index);
                    if(index == hashAddr){
                        return std::make_pair(&elem,false);
                    }
                    if(m_table[index].m_kv.first == key && m_table[index].m_state == EXIST){
                        return std::make_pair(&m_table[index], true);
                    }
                }
            }else{//平方查找
                int offset =1;
                while(m_table[index].m_state != EMPTY){ //EXIST和DELETE都是要查找的目标
                    index = DetectedSquare(index,offset);
                    if(m_table[index].m_kv.first == key && m_table[index].m_state == EXIST){
                        return std::make_pair(&m_table[index], true);
                    }
                }
            }
        }else{
            if(m_table[index].m_state == EXIST){
                return make_pair(&elem, true);
            }
        }

        return make_pair(&elem, false);
    }

    bool Remove(const K& key){
        auto ret = Find(key);
        if(ret.second){
            ret.first->m_state = DELETE;
            m_size--;
            return true;
        }
        return false;
    }

    inline unsigned int Size(){
        return m_size;
    }

private:
    unsigned int DetectedLine(unsigned int hashAddr){ //线性探查法
        hashAddr++;
        if(hashAddr == m_table.size()){
            hashAddr = 0;
        }
        return hashAddr;
    }

    unsigned int DetectedSquare(unsigned int hashAddr,int& offset){//平方探查法
        /* 平方消解函数: H(i) = H(i-1) + 2i-1
         *
         * H(i) = H0 + i^2
         * H(i-1) = H0 + (i-1)^2
         * H(i) = H(i-1) + 2*i - 1 = H(i-1) + i<<1 - 1
         * */
        hashAddr += offset;
        offset+=2;
        hashAddr = hashAddr % m_table.size() ;
        return hashAddr;
    }

    void CheckSize(){
        if((m_size/m_table.size())>0.5){ //当装载因子a大于0.5时，需要将vector扩容处理
            m_table.resize(GetNextPrime(m_size));

            HashMap<K,V,isLine> hm;
            for(auto it : m_table){
                if(it.m_state == EXIST){
                    hm.Insert(it.m_kv.first,it.m_kv.second);
                }
            }
            this->Swap(hm);
        }
    }

    void Swap(HashMap<K,V,isLine> hm){
        std::swap(m_size,hm.m_size);
        m_table.swap(hm.m_table);
    }

    unsigned int GetNextPrime(const unsigned int size){//使用素数表对齐做哈希表的容量，降低哈希冲突
        const int _PrimeSize = 28;
        static const unsigned long _PrimeList[_PrimeSize] ={
                52ul, 97ul, 193ul, 389ul, 769ul,
                1543ul, 3079ul, 6151ul, 12289ul, 24593ul,
                49157ul, 98317ul, 196613ul, 393241ul, 786433ul,
                1572869ul, 3145739ul, 6291469ul, 12582917ul, 24165843ul,
                50331653ul, 100663319ul, 201326611ul, 402653189ul, 805306457ul,
                1610612741ul, 3221225473ul, 4294967291ul
        };

        for(unsigned i = 0; i < _PrimeSize; ++i){
            if (_PrimeList[i] > size){
                return _PrimeList[i];
            }
        }
        return _PrimeList[_PrimeSize-1];
    }

private:
    std::vector<HashNode<K,V>> m_table; //hash table
    unsigned int m_size=0;//hash table store element number
};

void TestHashMap2(){
    {
        HashMap<int,int,false> map;
        map.Insert(25, 1);
        map.Insert(25, 2);
        map.Insert(14, 2);
        map.Insert(36, 3);
        map.Insert(49, 4);
        map.Insert(68, 5);
        map.Insert(57, 6);
        map.Insert(11, 7);
        map.Insert(37, 8);

        cout<<map.Size()<<endl;

        auto ret = map.Find(25);
        if(ret.second){
            std::cout<<"Find:key="<<ret.first->m_kv.first<<" value="<<ret.first->m_kv.second<<std::endl;
        }else{
            std::cout<<"not find"<<std::endl;
        }

        ret = map.Find(11);
        if(ret.second){
            std::cout<<"Find:key="<<ret.first->m_kv.first<<" value="<<ret.first->m_kv.second<<std::endl;
        }else{
            std::cout<<"not find"<<std::endl;
        }

        ret = map.Find(12);
        if(ret.second){
            std::cout<<"Find:key="<<ret.first->m_kv.first<<" value="<<ret.first->m_kv.second<<std::endl;
        }else{
            std::cout<<"not find:12"<<std::endl;
        }

        map.Remove(25);
        ret = map.Find(25);
        if(ret.second){
            std::cout<<"Find:key="<<ret.first->m_kv.first<<" value="<<ret.first->m_kv.second<<std::endl;
        }else{
            std::cout<<"not find:25"<<std::endl;
        }

        map.Remove(14);
        cout<<map.Size()<<endl;
    }
}

}

#endif