内散列，外散列，以及带溢出表的散列的实现及性能分析

最新推荐文章于 2022-07-21 17:04:07 发布

小文件

最新推荐文章于 2022-07-21 17:04:07 发布

阅读量1.8k

点赞数

分类专栏：数据结构软设

本文链接：https://blog.csdn.net/readonlyfile/article/details/8929178

版权

数据结构同时被 2 个专栏收录

2 篇文章 0 订阅

订阅专栏

软设

2 篇文章 0 订阅

订阅专栏

糊里糊涂的做了一个hash类的继承，使这三种散列实现方式具有相同的外部接口；

在main里面调用同一个函数实现了三种散列的性能测试，感觉很牛逼的感觉；当然，是自我感觉。

main.cpp

#include <iostream>
#include <fstream>
#include <ctime>
#include <iomanip>
#include <stdlib.h>
#include "hashTable.h"
/**
    程序名称：散列结构性能测试程序
    实现功能：内散列，外散列，带溢出表的内散列的基准（填充因子0.5，数据规模小，插入数据随机
              查找数据一般是插入数据，一般是随机数据，散列函数为除余法）测试，满填充因子测试
             （填充因子为 1），改变散列函数测试（相乘取整法），增大数据规模（big）测试，改变
              查找数据分布（全部为插入的数据）测试。
    结论 ： 采用内散列法，当填充因子很大时（测试程序为1），插入查找次数巨大，效率低
            采用外散列法，当数据规模较大时，耗时较多（可能是由于频繁取址操作导致）
            采用带溢出表的内散列法，平均性能最好，（应该较外散列法在表不满时更耗空间）
*/
struct Info
{
    string type ;
    double times ;
    double insertRate ;
    double searchRate ;
    Info * next ;
} ;
void printResult(string type,int totalData,int sucInsert,int sucSearch,int insertTimes,int searchTimes,double times) ;
void test(Hash & x,int totalData,int insArray[] ,int seaArray[],string type,Info * & rear) ;
void printTotalInfo(Info * head) ;
int main()
{
    const int small = 10000 ;
    const int big = 100000 ;
    int randomNum_small[small] ;//小规模随机插入数据
    int randomNum_big[big] ;//大规模随机插入数据
    int searchArraySmall[small] ;//对应小规模的查找源数据，一半来自插入数据（hash表中肯定存在），一半再次随机产生
    int searchArrayBig[big] ;//对应大规模查找数据源
    Info * head = new Info ;
    head->next = NULL ;
    Info * rear = head ;
    //产生伪随机数，范围0~2^16 -1
    srand(time(0)) ;
    for(int i = 0 ; i < small ; i++)
    {
        randomNum_small[i] = rand() ;
    }
    for(int i = 0 ; i < big ; i++)
    {
        randomNum_big[i] = rand() ;
    }
    //产生查找数据源
    for(int i = 0 ; i < small/2 ; i++)
    {
        searchArraySmall[i] = randomNum_small[i] ;
    }
    for(int i = small/2 ; i< small ; i++)
    {
        searchArraySmall[i] = rand() ;
    }
    for(int i = 0 ; i < big/2 ; i++)
    {
        searchArrayBig[i] = randomNum_big[i] ;
    }
    for(int i = big/2 ; i < big ; i++)
    {
        searchArrayBig[i] = rand() ;
    }
    //基准hash
    HashTable_inner x(small*2) ;
    test(x,small,randomNum_small,searchArraySmall,"内散列法—— 基准测试",rear) ;
    //改变填充因子
    HashTable_inner xFactorFull(small) ;
    test(xFactorFull,small,randomNum_small,searchArraySmall,"内散列法——改变填充因子",rear) ;
    //改变散列函数
    HashTable_inner xGETINTHASH(small*2,GETINTHASH) ;
    test(xGETINTHASH,small,randomNum_small,searchArraySmall,"内散列法——改变散列函数",rear) ;
    //改变数据规模
    HashTable_inner xBig(big*2) ;
    test(xBig,big,randomNum_big,searchArrayBig,"内散列法——改变数据规模",rear) ;
    //改变查找数据的分布（用添加的数据直接查找）
    HashTable_inner xDistribution(small*2) ;
    test(xDistribution,small,randomNum_small,randomNum_small,"内散列法——改变数据分布",rear) ;
    //外散列
    //基准测试
    HashTable_open x2(small*2) ;
    test(x2,small,randomNum_small,searchArraySmall,"外散列法——基准测试",rear) ;
    //改变填充因子
    HashTable_open x2FactorFull(small) ;
    test(x2FactorFull,small,randomNum_small,searchArraySmall,"外散列法——改变填充因子",rear) ;
    //改变散列函数
    HashTable_open x2GETINTHASH(small*2) ;
    test(x2GETINTHASH,small,randomNum_small,searchArraySmall,"外散列法——改变散列函数",rear) ;
    //改变数据规模
    HashTable_open x2Big(big*2) ;
    test(x2Big,big,randomNum_big,searchArrayBig,"外散列法——改变数据规模",rear) ;
    //改变查找数据分布
    HashTable_open x2Distribution(small*2) ;
    test(x2Distribution,small,randomNum_small,searchArraySmall,"外散列法—— 改变数据分布     ",rear) ;
    //带溢出表的内散列
    //基准测试
    HashTable_overflow x3(small*2) ;
    test(x3,small,randomNum_small,searchArraySmall,"带溢出表的内散列法——基准测试",rear) ;
    //改变填充因子
    HashTable_overflow x3FactorFull(small) ;
    test(x3FactorFull,small,randomNum_small,searchArraySmall,"带溢出表的内散列法——改变填充因子",rear) ;
    //改变散列函数
    HashTable_overflow x3GETINTHASH(small*2) ;
    test(x3GETINTHASH,small,randomNum_small,searchArraySmall,"带溢出表达的内散列法——改变散列函数",rear) ;
    //改变数据规模
    HashTable_overflow x3Big(big*2) ;
    test(x3Big,big,randomNum_big,searchArrayBig,"带溢出表的内散列法——改变数据规模",rear) ;
    //改变查找数据分布
    HashTable_overflow x3Distribution(small*2) ;
    test(x3Distribution,small,randomNum_small,randomNum_small,"带溢出表的内散列法——改变数据分布",rear) ;
    //输出汇总信息
    printTotalInfo(head) ;
    return 0;
}
void test(Hash & x,int totalData,int insArray[] ,int seaArray[],string type,Info * & rear)
{
    int sucInsTimes = 0 ;
    int sucSeaTimes = 0 ;
    clock_t startTime ;
    clock_t finishTime ;
    startTime = clock() ;
    for(int i = 0 ; i <totalData ; i++)
    {
        StaffInfo tmp ;
        tmp.id = insArray[i] ;
        tmp.name = tmp.id+"'s name" ;
        if(x.Insert(tmp))
        {
            sucInsTimes++ ;
        }
    }
    for(int i = 0 ; i < totalData ; i++)
    {
        if(x.Search(seaArray[i]) != INVALID)
        {
            sucSeaTimes++ ;
        }
    }
    finishTime = clock() ;
    Info * tmpInfo ;
    tmpInfo = new Info ;
    tmpInfo->type = type ;
    tmpInfo->times = (double)(finishTime-startTime)/CLOCKS_PER_SEC ;
    tmpInfo->searchRate = ((double)x.getSeaTimes()/totalData) ;
    tmpInfo->insertRate = ((double)x.getInsTimes()/totalData) ;
    tmpInfo->next = NULL ;
    rear->next = tmpInfo ;
    rear = tmpInfo ;
    printResult(type,totalData,sucInsTimes,sucSeaTimes,x.getInsTimes(),x.getSeaTimes(),rear->times) ;
}
void printResult(string type,int totalData,int sucInsert,int sucSearch,int insertTimes,int searchTimes,double time)
{
    cout <<"\n\t\t" <<type <<"信息"<<endl ;
    cout <<"插入总数据量为 "<<totalData <<"\n成功插入的数据量为"<<sucInsert <<",其余 "<<totalData-sucInsert<<" 个数据为重复数据\n" ;
    cout <<"查找总数据量为 "<<totalData <<"\n成功查找的数据量为 "<<sucSearch <<"，总数据中至少有 "<<totalData/2<<" 个数据肯定存在\n" ;
    cout <<"总的插入次数为 "<<insertTimes <<" ,"<<"与总插入数据量比为 " <<(double)insertTimes/totalData <<endl ;
    cout <<"总的查找次数为 "<<searchTimes <<" ,"<<"与总查找数据量比为 " <<(double)searchTimes/totalData <<endl ;
    cout <<"插入与查找耗时为 "<<time <<"ms" <<endl ;
}
void printTotalInfo(Info * head)
{
    Info * pos = head->next ;
    delete head ;
    cout <<"\n\t\t\t\t测试信息汇总\n\n\n\t类型\t\t\t\t耗时(ms)\t插入次数/输入量\t查找次数/输入量\t\n" ;
    while(pos != NULL)
    {

        cout  <<left <<setw(35)<<pos->type <<"\t" <<pos->times <<"\t\t" <<pos->insertRate <<"\t\t" <<pos->searchRate<<endl ;
        Info * tmp = pos ;
        pos = pos->next ;
        delete tmp ;
    }
}

hashTable.h

#ifndef HASHTABLE_H_INCLUDED
#define HASHTABLE_H_INCLUDED
#include <string>
#include <cmath>
#define EMPTY -1
#define DELETED -2
#define INVALID -3
#define MODHASH 1
#define GETINTHASH 2
#define A 0.618
//两种散列函数
/*
    一种是除余法，MODHASH
    另外一种是相乘取整法，GETINTHASH
    相乘取整法。
    取A = 0.618（根号5减1 除以2，黄金比例）
    然后用KEY乘以A，结果取其小数部分，
    然后用桶的数量MAX乘以该小数，强制转换为整数后返回
*/
/**
    **数据结构：Hash表
    **实现功能：插入，删除，查找
    **实现方式：内散列，外散列，带溢出表的内散列
*/
using namespace std ;
bool isPrimeNum(int n)
{
    if(n % 2 == 0)
        return false ;
    int sqrtN = static_cast<int>(sqrt(n)) ;
    for(int i = 3 ; i < sqrtN +1 ; i+=2)
    {
        if(n % i == 0)
            return false ;
    }
    return true ;
}
int getMaxPrimeNum(int n )
{
    for(int i = n ; i > 0 ; i--)
    {
        if(isPrimeNum(i))
            return i ;
    }
    return INVALID ;
}


struct StaffInfo
{
    int id ; //同时作为key和标识为空或DELETED的元素
    string name ;
} ;
class Hash //便于函数绑定（用父类指针指示派生类）
{
public :
    virtual int Search(int key) = 0  ;
    virtual bool Insert(StaffInfo records)=0 ;
    virtual bool Delete(int key) = 0 ;
    virtual int getInsTimes()=0 ;
    virtual int getSeaTimes()=0;

} ;
class HashTable_inner :  public Hash//内散列表
{
public :
    HashTable_inner(int n) ;
    HashTable_inner(int n,int type) ;
    int Search(int key ) ;
    bool Insert(StaffInfo records) ;
    bool Delete(int key) ;
    StaffInfo Retrieve(int key) ;
    int getSeaTimes() ;
    int getInsTimes() ;
private :
    int hash(int key) ;
    int max ;
    StaffInfo * hashTable ;
    int primeNum ;
    int InsertTimes ;
    int searchTimes ;
    int HASHTYPE ;
} ;
struct Node
{
    StaffInfo data ;
    Node * next ;
} ;
//----------------------------------
class HashTable_open : public Hash//外散列
{
public :
    HashTable_open(int n) ;
    HashTable_open(int n , int HASHTYPE) ;
    bool Insert(StaffInfo val) ;
    int Search(int key) ;
    bool Delete(int key) ;
    StaffInfo Retrieve(int key) ;
    int getInsTimes() ;
    int getSeaTimes() ;
private :
    int hash(int key) ;
    int max ;
    Node ** hashTable ;//散列表里只存指针
    int insTimes ;
    int seaTimes ;
    int primeNum ;
    int HASHTYPE ;
} ;
//----------------------------------------
class HashTable_overflow : public Hash //带溢出表的内散列
{
public :
    HashTable_overflow(int n) ;
    HashTable_overflow(int n , int HASHTYPE) ;
    int Search(int k) ;
    bool Insert(StaffInfo records) ;
    bool Delete(int key) ;
    int getInsTimes() ;
    int getSeaTimes() ;
private :
    int hash(int key) ;
    Node * hashTable ;
    int primeNum ;
    int insTimes ;
    int seaTimes ;
    int max ;
    int HASHTYPE ;
} ;
HashTable_inner::HashTable_inner(int n)
{
    max = n ;
    hashTable = new StaffInfo[max] ;
    for(int i = 0 ; i < max ; i++)
    {
        hashTable[i].id = EMPTY ;
    }
    primeNum = getMaxPrimeNum(max) ;
    searchTimes = 0 ;
    InsertTimes = 0 ;
    HASHTYPE = MODHASH ;
}
HashTable_inner::HashTable_inner(int n, int type)
{
    max = n ;
    hashTable = new StaffInfo[max] ;
    for(int i = 0 ; i < max ; i++)
    {
        hashTable[i].id = EMPTY ;
    }
    primeNum = getMaxPrimeNum(max) ;
    searchTimes = 0 ;
    InsertTimes = 0 ;
    if(type == MODHASH)
    {
        HASHTYPE = MODHASH ;
    }
    else
    {
        HASHTYPE = GETINTHASH ;
    }
}
int HashTable_inner::hash(int key)
{
    if(HASHTYPE == MODHASH)
    {
        return key%primeNum ; //散列函数，质数除余法
    }
    else
    {
        double x = key * A ;
        double xFloat = x- static_cast<int>(x) ;
        return static_cast<int>(max*xFloat) ;
    }
}
bool HashTable_inner::Insert(StaffInfo records)
{
    int reHash = 0 ;
    int counter = 1 ;
    int first = hash(records.id) ;
    int hashCode = first ;
    while(counter < max && hashTable[hashCode].id != records.id)
    {
        InsertTimes++ ;
        hashCode = (first+reHash) % max ;
        if(hashTable[hashCode].id == EMPTY || hashTable[hashCode].id == DELETED)
        {
            hashTable[hashCode].id = records.id ;
            hashTable[hashCode].name = records.name ;
            return true ;
        }
        else
        {
            reHash+=(counter*counter) ;
            counter++ ;
        }

        if(counter >= max)
        {
            return false ;
        }

    }
    return false ;
}
int HashTable_inner::Search(int key)
{
    int first = hash(key) ;
    int counter = 0 ;
    int reHash = 0 ;
    int hashCode = first ;
    while(counter < max && hashTable[hashCode].id != EMPTY)
    {
        searchTimes++ ;
        if(hashTable[hashCode].id == key)
        {
            return hashCode ;
        }
        else
        {
            counter++ ;
            reHash = counter*counter ;
            hashCode = (hashCode + reHash)% max ;
        }
    }
    return INVALID ;
}
bool HashTable_inner::Delete(int key)
{
    int location = Search(key) ;
    if(location  != INVALID )
    {
        hashTable[location].id = DELETED ;
        return true ;
    }
    else
    {
        return false ;
    }
}
int HashTable_inner::getSeaTimes()
{
    return searchTimes ;
}
int HashTable_inner::getInsTimes()
{
    return InsertTimes ;
}
//------------------------------------------
HashTable_open::HashTable_open(int n)
{
    max = n ;
    hashTable = new Node*[max] ;
    for(int i = 0 ; i < max ; i++)
    {
        hashTable[i] = NULL ;
    }
    insTimes = 0 ;
    seaTimes = 0 ;
    HASHTYPE = MODHASH ;
    primeNum = getMaxPrimeNum(max) ;

}
HashTable_open::HashTable_open(int n , int HASHTYPE)
{
    max = n ;
    hashTable = new Node * [max] ;
    for(int i = 0 ; i < max ; i++)
    {
        hashTable[i] = NULL ;
    }
    this->HASHTYPE = HASHTYPE ;
    primeNum = getMaxPrimeNum(max) ;
}
int HashTable_open::hash(int key)
{
    if(HASHTYPE == MODHASH)
    {
        return key%primeNum ;
    }
    else
    {
        double x = key * A ;
        double xFloat = x - (int)x ;
        return (int)(xFloat* max) ;
    }
}
bool HashTable_open::Insert(StaffInfo val)
{

    int hashCode = hash(val.id) ;
    Node * tmp = new Node ;
    tmp->data.id = val.id ;
    tmp->data.name = val.name ;
    Node * pos = hashTable[hashCode] ;
    while(pos != NULL)
    {
        if(pos->data.id == val.id)
        {
            return false ; //重复的数据,不插入
        }
        insTimes++ ;
        pos = pos->next ;
    }

    tmp->next = hashTable[hashCode] ;
    hashTable[hashCode] = tmp ; //插入到hash表第一个位置
    insTimes++ ;
    return true ;
}
int HashTable_open::Search(int key)
{
    seaTimes++ ;
    int hashCode = hash(key) ;
    Node * pos = hashTable[hashCode] ;
    while(pos != NULL)
    {
        if(pos->data.id == key)
        {
            return hashCode ;
        }
        seaTimes++ ;
        pos = pos->next ;
    }
    return INVALID ;
}
bool HashTable_open::Delete(int key)
{
    int hashCode  = hash(key) ;
    Node * pos = hashTable[hashCode] ;
    if(pos != NULL)
    {
        if(pos->data.id == key)
        {
            delete pos ;
            pos = NULL ;
            return true ;
        }
        else
        {
            Node * posNext = pos->next ;
            while(posNext != NULL)
            {
                if(posNext->data.id == key)
                {
                    pos->next = posNext->next ;
                    delete posNext ;
                    return true ;
                }
            }
            return false ;
        }
    }
    return false ;
}
int HashTable_open::getInsTimes()
{
    return insTimes ;
}
int HashTable_open::getSeaTimes()
{
    return seaTimes ;
}
//---------------------------------
HashTable_overflow::HashTable_overflow(int n)
{
    max = n ;
    hashTable = new Node[max] ;
    for(int i = 0 ; i < max ; i++)
    {
        hashTable[i].data.id = EMPTY ;
        hashTable[i].next = NULL ;
    }
    primeNum = getMaxPrimeNum(max) ;
    insTimes = 0 ;
    seaTimes = 0 ;
    HASHTYPE = MODHASH ;
}
HashTable_overflow::HashTable_overflow(int n,int HASHTYPE)
{
    max = n ;
    hashTable = new Node[max] ;
    for(int i = 0 ; i < max ; i++)
    {
        hashTable[i].data.id = EMPTY ;
        hashTable[i].next = NULL ;
    }
    this->HASHTYPE = HASHTYPE ;
    insTimes = 0 ;
    seaTimes = 0 ;
    primeNum = getMaxPrimeNum(max) ;
}
int HashTable_overflow::hash(int key)
{
    if(HASHTYPE == MODHASH)
    {
        return key%primeNum ;
    }
    else
    {
        double x = key * A ;
        double xFloat =x - (int)x ;
        return (int)(xFloat*max) ;
    }
}
bool HashTable_overflow::Insert(StaffInfo records)
{
    int hashCode = hash(records.id) ;
    if(hashTable[hashCode].data.id == EMPTY)
    {
        insTimes++ ;
        hashTable[hashCode].data.id = records.id ;
        hashTable[hashCode].data.name = records.name ;
        return true ;
    }
    else if(hashTable[hashCode].data.id == records.id)
    {
        insTimes++ ;
        return false ;
    }
    else
    {
        Node * pos = hashTable[hashCode].next ;
        while(pos != NULL)
        {
            if(pos->data.id == records.id)
            {
                insTimes++ ;
                return false ;
            }
            insTimes++ ;
            pos = pos->next ;
        }
        pos = new Node ;
        pos->data.id =records.id ;
        pos->data.name = records.name ;
        pos->next = hashTable[hashCode].next ;
        hashTable[hashCode].next = pos ;//插入到主表之后
        insTimes++ ;
        return true ;
    }
}
int HashTable_overflow::Search(int key)
{
    int hashCode = hash(key) ;
    seaTimes++ ;
    if(hashTable[hashCode].data.id == EMPTY)
    {
        return INVALID ;
    }
    else if(hashTable[hashCode].data.id == key)
    {
        return hashCode ;
    }
    else
    {
        Node * pos = hashTable[hashCode].next ;
        while(pos != NULL)
        {
            if(pos->data.id == key)
            {
                seaTimes++ ;
                return hashCode ;
            }
            seaTimes++ ;
            pos = pos->next ;
        }
        return INVALID ;
    }
}
bool HashTable_overflow::Delete(int key)
{
    int hashCode = hash(key) ;
    if(hashTable[hashCode].data.id == EMPTY)
    {
        return false ;
    }
    else if(hashTable[hashCode].data.id == key)
    {
        if(hashTable[hashCode].next == NULL)
        {
            hashTable[hashCode].data.id = EMPTY ;
            return true ;
        }
        else
        {
            Node * pos = hashTable[hashCode].next ;
            hashTable[hashCode].data.id = pos->data.id ;
            hashTable[hashCode].data.name = pos->data.name ;
            hashTable[hashCode].next = pos->next ;
            delete pos ;
            pos = NULL ;
            return true ;
        }
    }
    else
    {
        Node * pos = &hashTable[hashCode] ;
        while(pos->next != NULL)
        {
            if(pos->next->data.id == key)
            {
                Node * tmp = pos->next ;
                pos->next = tmp->next ;
                delete pos->next ;
                return true ;
            }
            pos = pos->next ;
        }
        return false ;
    }
}
int HashTable_overflow::getInsTimes()
{
    return insTimes ;
}
int HashTable_overflow::getSeaTimes()
{
    return seaTimes ;
}
#endif // HASHTABLE_H_INCLUDED

小文件

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
0
评论
内散列，外散列，以及带溢出表的散列的实现及性能分析

糊里糊涂的做了一个hash类的继承，使这三种散列实现方式具有相同的外部接口；在main里面调用同一个函数实现了三种散列的性能测试，感觉很牛逼的感觉；当然，是自我感觉。main.cpp#include #include #include #include #include #include "hashTable.h"/** 程序名称：散列结构性能测试程序实现
复制链接

扫一扫