糊里糊涂的做了一个hash类的继承,使这三种散列实现方式具有相同的外部接口;
在main里面调用同一个函数实现了三种散列的性能测试,感觉很牛逼的感觉;当然,是自我感觉。
main.cpp
#include <iostream>
#include <fstream>
#include <ctime>
#include <iomanip>
#include <stdlib.h>
#include "hashTable.h"
/**
程序名称:散列结构性能测试程序
实现功能:内散列,外散列,带溢出表的内散列的基准(填充因子0.5,数据规模小,插入数据随机
查找数据一般是插入数据,一般是随机数据,散列函数为除余法)测试,满填充因子测试
(填充因子为 1),改变散列函数测试(相乘取整法),增大数据规模(big)测试,改变
查找数据分布(全部为插入的数据)测试。
结论 : 采用内散列法,当填充因子很大时(测试程序为1),插入查找次数巨大,效率低
采用外散列法,当数据规模较大时,耗时较多(可能是由于频繁取址操作导致)
采用带溢出表的内散列法,平均性能最好,(应该较外散列法在表不满时更耗空间)
*/
struct Info
{
string type ;
double times ;
double insertRate ;
double searchRate ;
Info * next ;
} ;
void printResult(string type,int totalData,int sucInsert,int sucSearch,int insertTimes,int searchTimes,double times) ;
void test(Hash & x,int totalData,int insArray[] ,int seaArray[],string type,Info * & rear) ;
void printTotalInfo(Info * head) ;
int main()
{
const int small = 10000 ;
const int big = 100000 ;
int randomNum_small[small] ;//小规模随机插入数据
int randomNum_big[big] ;//大规模随机插入数据
int searchArraySmall[small] ;//对应小规模的查找源数据,一半来自插入数据(hash表中肯定存在),一半再次随机产生
int searchArrayBig[big] ;//对应大规模查找数据源
Info * head = new Info ;
head->next = NULL ;
Info * rear = head ;
//产生伪随机数,范围0~2^16 -1
srand(time(0)) ;
for(int i = 0 ; i < small ; i++)
{
randomNum_small[i] = rand() ;
}
for(int i = 0 ; i < big ; i++)
{
randomNum_big[i] = rand() ;
}
//产生查找数据源
for(int i = 0 ; i < small/2 ; i++)
{
searchArraySmall[i] = randomNum_small[i] ;
}
for(int i = small/2 ; i< small ; i++)
{
searchArraySmall[i] = rand() ;
}
for(int i = 0 ; i < big/2 ; i++)
{
searchArrayBig[i] = randomNum_big[i] ;
}
for(int i = big/2 ; i < big ; i++)
{
searchArrayBig[i] = rand() ;
}
//基准hash
HashTable_inner x(small*2) ;
test(x,small,randomNum_small,searchArraySmall,"内散列法—— 基准测试",rear) ;
//改变填充因子
HashTable_inner xFactorFull(small) ;
test(xFactorFull,small,randomNum_small,searchArraySmall,"内散列法——改变填充因子",rear) ;
//改变散列函数
HashTable_inner xGETINTHASH(small*2,GETINTHASH) ;
test(xGETINTHASH,small,randomNum_small,searchArraySmall,"内散列法——改变散列函数",rear) ;
//改变数据规模
HashTable_inner xBig(big*2) ;
test(xBig,big,randomNum_big,searchArrayBig,"内散列法——改变数据规模",rear) ;
//改变查找数据的分布(用添加的数据直接查找)
HashTable_inner xDistribution(small*2) ;
test(xDistribution,small,randomNum_small,randomNum_small,"内散列法——改变数据分布",rear) ;
//外散列
//基准测试
HashTable_open x2(small*2) ;
test(x2,small,randomNum_small,searchArraySmall,"外散列法——基准测试",rear) ;
//改变填充因子
HashTable_open x2FactorFull(small) ;
test(x2FactorFull,small,randomNum_small,searchArraySmall,"外散列法——改变填充因子",rear) ;
//改变散列函数
HashTable_open x2GETINTHASH(small*2) ;
test(x2GETINTHASH,small,randomNum_small,searchArraySmall,"外散列法——改变散列函数",rear) ;
//改变数据规模
HashTable_open x2Big(big*2) ;
test(x2Big,big,randomNum_big,searchArrayBig,"外散列法——改变数据规模",rear) ;
//改变查找数据分布
HashTable_open x2Distribution(small*2) ;
test(x2Distribution,small,randomNum_small,searchArraySmall,"外散列法—— 改变数据分布 ",rear) ;
//带溢出表的内散列
//基准测试
HashTable_overflow x3(small*2) ;
test(x3,small,randomNum_small,searchArraySmall,"带溢出表的内散列法——基准测试",rear) ;
//改变填充因子
HashTable_overflow x3FactorFull(small) ;
test(x3FactorFull,small,randomNum_small,searchArraySmall,"带溢出表的内散列法——改变填充因子",rear) ;
//改变散列函数
HashTable_overflow x3GETINTHASH(small*2) ;
test(x3GETINTHASH,small,randomNum_small,searchArraySmall,"带溢出表达的内散列法——改变散列函数",rear) ;
//改变数据规模
HashTable_overflow x3Big(big*2) ;
test(x3Big,big,randomNum_big,searchArrayBig,"带溢出表的内散列法——改变数据规模",rear) ;
//改变查找数据分布
HashTable_overflow x3Distribution(small*2) ;
test(x3Distribution,small,randomNum_small,randomNum_small,"带溢出表的内散列法——改变数据分布",rear) ;
//输出汇总信息
printTotalInfo(head) ;
return 0;
}
void test(Hash & x,int totalData,int insArray[] ,int seaArray[],string type,Info * & rear)
{
int sucInsTimes = 0 ;
int sucSeaTimes = 0 ;
clock_t startTime ;
clock_t finishTime ;
startTime = clock() ;
for(int i = 0 ; i <totalData ; i++)
{
StaffInfo tmp ;
tmp.id = insArray[i] ;
tmp.name = tmp.id+"'s name" ;
if(x.Insert(tmp))
{
sucInsTimes++ ;
}
}
for(int i = 0 ; i < totalData ; i++)
{
if(x.Search(seaArray[i]) != INVALID)
{
sucSeaTimes++ ;
}
}
finishTime = clock() ;
Info * tmpInfo ;
tmpInfo = new Info ;
tmpInfo->type = type ;
tmpInfo->times = (double)(finishTime-startTime)/CLOCKS_PER_SEC ;
tmpInfo->searchRate = ((double)x.getSeaTimes()/totalData) ;
tmpInfo->insertRate = ((double)x.getInsTimes()/totalData) ;
tmpInfo->next = NULL ;
rear->next = tmpInfo ;
rear = tmpInfo ;
printResult(type,totalData,sucInsTimes,sucSeaTimes,x.getInsTimes(),x.getSeaTimes(),rear->times) ;
}
void printResult(string type,int totalData,int sucInsert,int sucSearch,int insertTimes,int searchTimes,double time)
{
cout <<"\n\t\t" <<type <<"信息"<<endl ;
cout <<"插入总数据量为 "<<totalData <<"\n成功插入的数据量为"<<sucInsert <<",其余 "<<totalData-sucInsert<<" 个数据为重复数据\n" ;
cout <<"查找总数据量为 "<<totalData <<"\n成功查找的数据量为 "<<sucSearch <<",总数据中至少有 "<<totalData/2<<" 个数据肯定存在\n" ;
cout <<"总的插入次数为 "<<insertTimes <<" ,"<<"与总插入数据量比为 " <<(double)insertTimes/totalData <<endl ;
cout <<"总的查找次数为 "<<searchTimes <<" ,"<<"与总查找数据量比为 " <<(double)searchTimes/totalData <<endl ;
cout <<"插入与查找耗时为 "<<time <<"ms" <<endl ;
}
void printTotalInfo(Info * head)
{
Info * pos = head->next ;
delete head ;
cout <<"\n\t\t\t\t测试信息汇总\n\n\n\t类型\t\t\t\t耗时(ms)\t插入次数/输入量\t查找次数/输入量\t\n" ;
while(pos != NULL)
{
cout <<left <<setw(35)<<pos->type <<"\t" <<pos->times <<"\t\t" <<pos->insertRate <<"\t\t" <<pos->searchRate<<endl ;
Info * tmp = pos ;
pos = pos->next ;
delete tmp ;
}
}
hashTable.h
#ifndef HASHTABLE_H_INCLUDED
#define HASHTABLE_H_INCLUDED
#include <string>
#include <cmath>
#define EMPTY -1
#define DELETED -2
#define INVALID -3
#define MODHASH 1
#define GETINTHASH 2
#define A 0.618
//两种散列函数
/*
一种是除余法,MODHASH
另外一种是相乘取整法,GETINTHASH
相乘取整法。
取A = 0.618(根号5减1 除以2,黄金比例)
然后用KEY乘以A,结果取其小数部分,
然后用桶的数量MAX乘以该小数,强制转换为整数后返回
*/
/**
**数据结构:Hash表
**实现功能:插入,删除,查找
**实现方式:内散列,外散列,带溢出表的内散列
*/
using namespace std ;
bool isPrimeNum(int n)
{
if(n % 2 == 0)
return false ;
int sqrtN = static_cast<int>(sqrt(n)) ;
for(int i = 3 ; i < sqrtN +1 ; i+=2)
{
if(n % i == 0)
return false ;
}
return true ;
}
int getMaxPrimeNum(int n )
{
for(int i = n ; i > 0 ; i--)
{
if(isPrimeNum(i))
return i ;
}
return INVALID ;
}
struct StaffInfo
{
int id ; //同时作为key和标识为空或DELETED的元素
string name ;
} ;
class Hash //便于函数绑定(用父类指针指示派生类)
{
public :
virtual int Search(int key) = 0 ;
virtual bool Insert(StaffInfo records)=0 ;
virtual bool Delete(int key) = 0 ;
virtual int getInsTimes()=0 ;
virtual int getSeaTimes()=0;
} ;
class HashTable_inner : public Hash//内散列表
{
public :
HashTable_inner(int n) ;
HashTable_inner(int n,int type) ;
int Search(int key ) ;
bool Insert(StaffInfo records) ;
bool Delete(int key) ;
StaffInfo Retrieve(int key) ;
int getSeaTimes() ;
int getInsTimes() ;
private :
int hash(int key) ;
int max ;
StaffInfo * hashTable ;
int primeNum ;
int InsertTimes ;
int searchTimes ;
int HASHTYPE ;
} ;
struct Node
{
StaffInfo data ;
Node * next ;
} ;
//----------------------------------
class HashTable_open : public Hash//外散列
{
public :
HashTable_open(int n) ;
HashTable_open(int n , int HASHTYPE) ;
bool Insert(StaffInfo val) ;
int Search(int key) ;
bool Delete(int key) ;
StaffInfo Retrieve(int key) ;
int getInsTimes() ;
int getSeaTimes() ;
private :
int hash(int key) ;
int max ;
Node ** hashTable ;//散列表里只存指针
int insTimes ;
int seaTimes ;
int primeNum ;
int HASHTYPE ;
} ;
//----------------------------------------
class HashTable_overflow : public Hash //带溢出表的内散列
{
public :
HashTable_overflow(int n) ;
HashTable_overflow(int n , int HASHTYPE) ;
int Search(int k) ;
bool Insert(StaffInfo records) ;
bool Delete(int key) ;
int getInsTimes() ;
int getSeaTimes() ;
private :
int hash(int key) ;
Node * hashTable ;
int primeNum ;
int insTimes ;
int seaTimes ;
int max ;
int HASHTYPE ;
} ;
HashTable_inner::HashTable_inner(int n)
{
max = n ;
hashTable = new StaffInfo[max] ;
for(int i = 0 ; i < max ; i++)
{
hashTable[i].id = EMPTY ;
}
primeNum = getMaxPrimeNum(max) ;
searchTimes = 0 ;
InsertTimes = 0 ;
HASHTYPE = MODHASH ;
}
HashTable_inner::HashTable_inner(int n, int type)
{
max = n ;
hashTable = new StaffInfo[max] ;
for(int i = 0 ; i < max ; i++)
{
hashTable[i].id = EMPTY ;
}
primeNum = getMaxPrimeNum(max) ;
searchTimes = 0 ;
InsertTimes = 0 ;
if(type == MODHASH)
{
HASHTYPE = MODHASH ;
}
else
{
HASHTYPE = GETINTHASH ;
}
}
int HashTable_inner::hash(int key)
{
if(HASHTYPE == MODHASH)
{
return key%primeNum ; //散列函数,质数除余法
}
else
{
double x = key * A ;
double xFloat = x- static_cast<int>(x) ;
return static_cast<int>(max*xFloat) ;
}
}
bool HashTable_inner::Insert(StaffInfo records)
{
int reHash = 0 ;
int counter = 1 ;
int first = hash(records.id) ;
int hashCode = first ;
while(counter < max && hashTable[hashCode].id != records.id)
{
InsertTimes++ ;
hashCode = (first+reHash) % max ;
if(hashTable[hashCode].id == EMPTY || hashTable[hashCode].id == DELETED)
{
hashTable[hashCode].id = records.id ;
hashTable[hashCode].name = records.name ;
return true ;
}
else
{
reHash+=(counter*counter) ;
counter++ ;
}
if(counter >= max)
{
return false ;
}
}
return false ;
}
int HashTable_inner::Search(int key)
{
int first = hash(key) ;
int counter = 0 ;
int reHash = 0 ;
int hashCode = first ;
while(counter < max && hashTable[hashCode].id != EMPTY)
{
searchTimes++ ;
if(hashTable[hashCode].id == key)
{
return hashCode ;
}
else
{
counter++ ;
reHash = counter*counter ;
hashCode = (hashCode + reHash)% max ;
}
}
return INVALID ;
}
bool HashTable_inner::Delete(int key)
{
int location = Search(key) ;
if(location != INVALID )
{
hashTable[location].id = DELETED ;
return true ;
}
else
{
return false ;
}
}
int HashTable_inner::getSeaTimes()
{
return searchTimes ;
}
int HashTable_inner::getInsTimes()
{
return InsertTimes ;
}
//------------------------------------------
HashTable_open::HashTable_open(int n)
{
max = n ;
hashTable = new Node*[max] ;
for(int i = 0 ; i < max ; i++)
{
hashTable[i] = NULL ;
}
insTimes = 0 ;
seaTimes = 0 ;
HASHTYPE = MODHASH ;
primeNum = getMaxPrimeNum(max) ;
}
HashTable_open::HashTable_open(int n , int HASHTYPE)
{
max = n ;
hashTable = new Node * [max] ;
for(int i = 0 ; i < max ; i++)
{
hashTable[i] = NULL ;
}
this->HASHTYPE = HASHTYPE ;
primeNum = getMaxPrimeNum(max) ;
}
int HashTable_open::hash(int key)
{
if(HASHTYPE == MODHASH)
{
return key%primeNum ;
}
else
{
double x = key * A ;
double xFloat = x - (int)x ;
return (int)(xFloat* max) ;
}
}
bool HashTable_open::Insert(StaffInfo val)
{
int hashCode = hash(val.id) ;
Node * tmp = new Node ;
tmp->data.id = val.id ;
tmp->data.name = val.name ;
Node * pos = hashTable[hashCode] ;
while(pos != NULL)
{
if(pos->data.id == val.id)
{
return false ; //重复的数据,不插入
}
insTimes++ ;
pos = pos->next ;
}
tmp->next = hashTable[hashCode] ;
hashTable[hashCode] = tmp ; //插入到hash表第一个位置
insTimes++ ;
return true ;
}
int HashTable_open::Search(int key)
{
seaTimes++ ;
int hashCode = hash(key) ;
Node * pos = hashTable[hashCode] ;
while(pos != NULL)
{
if(pos->data.id == key)
{
return hashCode ;
}
seaTimes++ ;
pos = pos->next ;
}
return INVALID ;
}
bool HashTable_open::Delete(int key)
{
int hashCode = hash(key) ;
Node * pos = hashTable[hashCode] ;
if(pos != NULL)
{
if(pos->data.id == key)
{
delete pos ;
pos = NULL ;
return true ;
}
else
{
Node * posNext = pos->next ;
while(posNext != NULL)
{
if(posNext->data.id == key)
{
pos->next = posNext->next ;
delete posNext ;
return true ;
}
}
return false ;
}
}
return false ;
}
int HashTable_open::getInsTimes()
{
return insTimes ;
}
int HashTable_open::getSeaTimes()
{
return seaTimes ;
}
//---------------------------------
HashTable_overflow::HashTable_overflow(int n)
{
max = n ;
hashTable = new Node[max] ;
for(int i = 0 ; i < max ; i++)
{
hashTable[i].data.id = EMPTY ;
hashTable[i].next = NULL ;
}
primeNum = getMaxPrimeNum(max) ;
insTimes = 0 ;
seaTimes = 0 ;
HASHTYPE = MODHASH ;
}
HashTable_overflow::HashTable_overflow(int n,int HASHTYPE)
{
max = n ;
hashTable = new Node[max] ;
for(int i = 0 ; i < max ; i++)
{
hashTable[i].data.id = EMPTY ;
hashTable[i].next = NULL ;
}
this->HASHTYPE = HASHTYPE ;
insTimes = 0 ;
seaTimes = 0 ;
primeNum = getMaxPrimeNum(max) ;
}
int HashTable_overflow::hash(int key)
{
if(HASHTYPE == MODHASH)
{
return key%primeNum ;
}
else
{
double x = key * A ;
double xFloat =x - (int)x ;
return (int)(xFloat*max) ;
}
}
bool HashTable_overflow::Insert(StaffInfo records)
{
int hashCode = hash(records.id) ;
if(hashTable[hashCode].data.id == EMPTY)
{
insTimes++ ;
hashTable[hashCode].data.id = records.id ;
hashTable[hashCode].data.name = records.name ;
return true ;
}
else if(hashTable[hashCode].data.id == records.id)
{
insTimes++ ;
return false ;
}
else
{
Node * pos = hashTable[hashCode].next ;
while(pos != NULL)
{
if(pos->data.id == records.id)
{
insTimes++ ;
return false ;
}
insTimes++ ;
pos = pos->next ;
}
pos = new Node ;
pos->data.id =records.id ;
pos->data.name = records.name ;
pos->next = hashTable[hashCode].next ;
hashTable[hashCode].next = pos ;//插入到主表之后
insTimes++ ;
return true ;
}
}
int HashTable_overflow::Search(int key)
{
int hashCode = hash(key) ;
seaTimes++ ;
if(hashTable[hashCode].data.id == EMPTY)
{
return INVALID ;
}
else if(hashTable[hashCode].data.id == key)
{
return hashCode ;
}
else
{
Node * pos = hashTable[hashCode].next ;
while(pos != NULL)
{
if(pos->data.id == key)
{
seaTimes++ ;
return hashCode ;
}
seaTimes++ ;
pos = pos->next ;
}
return INVALID ;
}
}
bool HashTable_overflow::Delete(int key)
{
int hashCode = hash(key) ;
if(hashTable[hashCode].data.id == EMPTY)
{
return false ;
}
else if(hashTable[hashCode].data.id == key)
{
if(hashTable[hashCode].next == NULL)
{
hashTable[hashCode].data.id = EMPTY ;
return true ;
}
else
{
Node * pos = hashTable[hashCode].next ;
hashTable[hashCode].data.id = pos->data.id ;
hashTable[hashCode].data.name = pos->data.name ;
hashTable[hashCode].next = pos->next ;
delete pos ;
pos = NULL ;
return true ;
}
}
else
{
Node * pos = &hashTable[hashCode] ;
while(pos->next != NULL)
{
if(pos->next->data.id == key)
{
Node * tmp = pos->next ;
pos->next = tmp->next ;
delete pos->next ;
return true ;
}
pos = pos->next ;
}
return false ;
}
}
int HashTable_overflow::getInsTimes()
{
return insTimes ;
}
int HashTable_overflow::getSeaTimes()
{
return seaTimes ;
}
#endif // HASHTABLE_H_INCLUDED