多核计算与程序设计 - 06 基本算法和数据结构之三哈希表与哈希链表

最新推荐文章于 2023-11-27 13:09:31 发布

zy__

最新推荐文章于 2023-11-27 13:09:31 发布

阅读量973

点赞数 1

分类专栏：多线程与并行程序设计

本文链接：https://blog.csdn.net/wangzhiyu1980/article/details/8458553

版权

多线程与并行程序设计专栏收录该内容

24 篇文章 3 订阅

订阅专栏

本文介绍了哈希表的索引方法，如整除取余法、折叠法、平方取中法和随机函数法，以及解决哈希冲突的链表存储法、索引探测法。通过示例展示了插入和查找操作，讨论了哈希链表如何结合哈希表和链表的优点。

摘要由CSDN通过智能技术生成

一，哈希表

哈希表：hash(杂乱信息的意思) 的音译，用来把一些杂乱无章的信息根据其关键字的特点映射到一个连续的空间，操作简单，用途广泛，例如：电子词典。

这里用到的映射方法称为索引方法。对应的实现函数称为哈希函数。将映射后的值称为索引。

1）哈希表的索引方法

整除取余法

对于整数而言，是很常用的一个方法，能够很快的计算出索引值，但有时候会产生较多的相同的索引，使算法退化。

最差的时候会变成顺序查找。

int hashInt1(int nKey, int uBucketCount)
{
    return nKey % uBucketCount;
}

// 一种优化的方式
// 把 uBucketCount 定义为1024, 2048 ... 2的幂
// uMask 的值为 uBucketCount - 1， 这样uMask的二进制值全部为 '1'
// 用key 和 uMask 进行与运算。
int hashInt2(int nKey, int uMask)
{
    return nKey & uMask;
}

折叠法

当关键词位数很长时，可以将关键字分割成位数相同的几部分，把每部分转换成计算机可以识别的整数，再将整数相加，得到新的整数，再把得到的新的整数用上面的取余法进行运算，得到hash值。

int hashString(char* strKey, int uBucketCount)
{
    int i = 0;
    int nRet = 0;
    int nHashValue = 0;
    char* p = strKey;
    
    while(*p != '\0')
    {
    <span style="white-space:pre">	</span><span style="color:#ff0000;">// 这里把字符串分组，并把每组转换为一个整数
    <span style="white-space:pre">	</span>// 这个转换方法可以自己定义，按照key的特征灵活选择</span>
        if (i == 5)
        {
            i = 0;
            nRet += nHashValue;
            nHashValue = 0;
        }


    <span style="white-space:pre">	</span>nHashValue += nHashValue << 3;
    <span style="white-space:pre">	</span>nHashValue += (int)(*p);
    <span style="white-space:pre">	</span>p++;
    <span style="white-space:pre">	</span>i++;
    }


    nRet += nHashValue;
<span style="white-space:pre">	</span>return nRet % uBucketCount;
}

平方取中法

将关键字进行平方运算后，再取中间几位作为索引。

//<span style="color:#ff0000;"> 例：平方后取234位(个位为第0位)作为hash值</span>
int hashSqrMid234(int nKey)
{
    int nRet =0;
    int nHashValue = 0;
	nHashValue = nKey * nKey;
	
	nRet = nHashValue / 100000;
	nHashValue = nHashValue /100;
	
	nRet = nHashValue % (nRet * 1000);
	return nRet;
}

随即函数法

用随机函数产生hash值。

2）哈希表的冲突解决方法

不同的关键字，通过hash算法可能得到相同的hash值，这就产生了冲突。

解决冲突的方法：

链表存储法

将同一索引的关键词放在一个链表中，将哈希表的索引指向链表的表头。

索引探测法

索引探测法的基本思想是发现索引有冲突后，在索引的位置向后查找一个空的索引位置，将数据存放在此索引的位置。

索引探测法公式描述：

H(i) = ( Hash ( key ) + P(i) ) % uBucketCount

线性探测法：

当发现索引处已经存储数据时，从索引位置向后按顺序查找一个空索引的位置，将数据存储在这个空位置上。

P(i) = 1

二次探测法和伪随机探测法：

P(i) = i * i 时为二次探测法

P(i) = 随机序列时为伪随机探测法

实例代码：

hash_table.h

#include <iostream>

using namespace std;

#ifndef __HASH_TABLE_H__
#define __HASH_TABLE_H__

typedef struct _SINGLE_NODE
{
    int  nData;
    _SINGLE_NODE *pNext;
}SINGLE_NODE;


class MyHashTable
{
public:
	MyHashTable();
	~MyHashTable();

    int  InsertHashTable(int nData);
    int  FindHash(int nData);
    void DeleteHash(int nData);

private:

    SINGLE_NODE ** m_pBucket;
	int            m_nBucketCount;
	int            m_nNodeCount;
	int            m_nCurrNodeID;
    SINGLE_NODE  * m_pCurrentNode;

	int calcHash(int nKey);
};

#endif

hash_table.cpp

#include "hash_table.h"

int hashInt1(int nKey, int uBucketCount)
{
    return nKey % uBucketCount;
}

// 一种优化的方式
// 把 uBucketCount 定义为1024, 2048 ... 2的幂
// uMask 的值为 uBucketCount - 1， 这样uMask的二进制值全部为 '1'
// 用key 和 uMask 进行与运算。
int hashInt2(int nKey, int uMask)
{
    return nKey & uMask;
}


int hashString(char* strKey, int uBucketCount)
{
    int i = 0;
    int nRet = 0;
    int nHashValue = 0;
    char* p = strKey;
    
    while(*p != '\0')
    {
    	// 这里把字符串分组，并把每组转换为一个整数
    	// 这个转换方法可以自己定义，按照key的特征灵活选择
        if (i == 5)
        {
            i = 0;
            nRet += nHashValue;
            nHashValue = 0;
        }

    	nHashValue += nHashValue << 3;
    	nHashValue += (int)(*p);
    	p++;
    	i++;
    }

    nRet += nHashValue;
	return nRet % uBucketCount;
}

int hashSqrMid234(int nKey)
{
    int nRet =0;
    int nHashValue = 0;
	nHashValue = nKey * nKey;  // 5499025
	
	nRet = nHashValue / 100000;   // 549
	nHashValue = nHashValue /100;   // 54990
	
	nRet = nHashValue % (nRet * 1000);
	return nRet;
}

MyHashTable::MyHashTable()
{
    m_nBucketCount = 1024;
    m_nNodeCount = 0;
	m_nCurrNodeID = 0;
    m_pCurrentNode = NULL;

	m_pBucket = (SINGLE_NODE**)malloc(m_nBucketCount * sizeof(SINGLE_NODE*));
	memset(m_pBucket, 0, m_nBucketCount * sizeof(SINGLE_NODE*));
}

MyHashTable::~MyHashTable()
{
    SINGLE_NODE *pNode = NULL;

    for (int i = 0 ; i < m_nBucketCount; i++)
	{
	    pNode = m_pBucket[i];
		while (pNode != NULL)
		{
		    m_pBucket[i] = pNode->pNext;
            free(pNode);
			pNode = NULL;
			pNode = m_pBucket[i];

		}
	}
}

int MyHashTable::calcHash(int nKey)
{
    return hashInt1(nKey, m_nBucketCount);
}


int MyHashTable::InsertHashTable(int nData)
{
    SINGLE_NODE *pNode = NULL;
    SINGLE_NODE *pNewNode;
	int nHashValue;

    pNewNode = (SINGLE_NODE*)malloc(sizeof(SINGLE_NODE));

    nHashValue = calcHash(nData);
	pNode = m_pBucket[nHashValue];

	cout << "Insert Hash Key: " << nHashValue  << "  Value=" << nData << endl;

	pNewNode->nData = nData;
	pNewNode->pNext = pNode;

    m_pBucket[nHashValue] = pNewNode;
    m_nNodeCount += 1;

	return m_nNodeCount;
}

int MyHashTable::FindHash(int nData)
{
    SINGLE_NODE *pNode;
	int nHashValue;

    nHashValue = calcHash(nData);
	pNode = m_pBucket[nHashValue];

    while(pNode != NULL)
	{
	    if (pNode->nData == nData)
		{
			cout << "FindHash find it Key= " <<nHashValue << " Data=" << pNode->nData << endl;
		    return pNode->nData;
		}
		pNode = pNode->pNext;
	}

	return -1;
}

void MyHashTable::DeleteHash(int nData)
{
    SINGLE_NODE *pNode;
	int nHashValue;

    nHashValue = calcHash(nData);
	pNode = m_pBucket[nHashValue];

    if (pNode != NULL)
	{
	    m_pBucket[nHashValue] = pNode->pNext;
		free(pNode);
		pNode = NULL;
	}
}

main.cpp 测试程序

#include "hash_table.h"

void main()
{
	int testArr[10] = {123, 4332,223,8664,2239,89777,54455,32,23,64322};
    int i = 0;

    MyHashTable hashTable;

	for (i = 0; i < 10; i++)
	{
	    hashTable.InsertHashTable(testArr[i]);
	}


    cout << endl;
	cout << "Finding the hash kes and values" << endl;
	cout << endl;
    for (i = 0; i < 10; i++)
	{
	    if (hashTable.FindHash(testArr[i]) != -1)
		{
		    cout << "========================" << endl;
		}
	}

	hashTable.DeleteHash(testArr[3]);

   cout << endl;
	cout << "Test the 3 the key is deleted" << endl;
	cout << endl;
    for (i = 0; i < 10; i++)
	{
	    if (hashTable.FindHash(testArr[i]) != -1)
		{
		    cout << "========================" << endl;
		}
	}
    cin >> i;

}

测试结果：

Insert Hash Key: 123 Value=123
Insert Hash Key: 236 Value=4332
Insert Hash Key: 223 Value=223
Insert Hash Key: 472 Value=8664
Insert Hash Key: 191 Value=2239
Insert Hash Key: 689 Value=89777
Insert Hash Key: 183 Value=54455
Insert Hash Key: 32 Value=32
Insert Hash Key: 23 Value=23
Insert Hash Key: 834 Value=64322

Finding the hash kes and values

FindHash find it Key= 123 Data=123
========================
FindHash find it Key= 236 Data=4332
========================
FindHash find it Key= 223 Data=223
========================
FindHash find it Key= 472 Data=8664
========================
FindHash find it Key= 191 Data=2239
========================
FindHash find it Key= 689 Data=89777
========================
FindHash find it Key= 183 Data=54455
========================
FindHash find it Key= 32 Data=32
========================
FindHash find it Key= 23 Data=23
========================
FindHash find it Key= 834 Data=64322
========================

Test the 3 the key is deleted

FindHash find it Key= 123 Data=123
========================
FindHash find it Key= 236 Data=4332
========================
FindHash find it Key= 223 Data=223
========================
FindHash find it Key= 191 Data=2239
========================
FindHash find it Key= 689 Data=89777
========================
FindHash find it Key= 183 Data=54455
========================
FindHash find it Key= 32 Data=32
========================
FindHash find it Key= 23 Data=23
========================
FindHash find it Key= 834 Data=64322
========================

二，哈希链表

哈希表可以实现快速的精确查找，但无法实现排序操作，链表可以实现有序的输出，但在查找时时间较长，为了结合哈希表和链表的优点这里实现了一个哈希链表。

哈希链表的数据结构是结合了链表和哈希表的复合数据结构。

实例代码：

hash_link_table.h

#include <iostream>

using namespace std;

#ifndef __HASH_LINK_TABLE_H__
#define __HASH_LINK_TABLE_H__



typedef struct _SINGLE_NODE
{
    int  nData;
    _SINGLE_NODE *pHashNext;
    _SINGLE_NODE *pPrev;
    _SINGLE_NODE *pNext;
}SINGLE_NODE;


class MyHashTable
{
public:
    MyHashTable();
    ~MyHashTable();

    int  InsertHashTable(int nData);
    int  FindHash(int nData);
    void DeleteHash(int nData);
    void ShowSortLink();

private:

    SINGLE_NODE ** m_pBucket;
    int            m_nBucketCount;
    int            m_nNodeCount;
    int            m_nCurrNodeID;
    SINGLE_NODE  * m_pCurrentNode;
    SINGLE_NODE  * m_pLinkHead;
    SINGLE_NODE  * m_pLinkTail;

    int calcHash(int nKey);
};

#endif

hash_link_table.cpp

#include "hash_link_table.h"

int hashInt1(int nKey, int uBucketCount)
{
    return nKey % uBucketCount;
}

// 一种优化的方式
// 把 uBucketCount 定义为1024, 2048 ... 2的幂
// uMask 的值为 uBucketCount - 1， 这样uMask的二进制值全部为 '1'
// 用key 和 uMask 进行与运算。
int hashInt2(int nKey, int uMask)
{
    return nKey & uMask;
}


int hashString(char* strKey, int uBucketCount)
{
    int i = 0;
    int nRet = 0;
    int nHashValue = 0;
    char* p = strKey;

    while(*p != '\0')
    {
        // 这里把字符串分组，并把每组转换为一个整数
        // 这个转换方法可以自己定义，按照key的特征灵活选择
        if (i == 5)
        {
            i = 0;
            nRet += nHashValue;
            nHashValue = 0;
        }

        nHashValue += nHashValue << 3;
        nHashValue += (int)(*p);
        p++;
        i++;
    }

    nRet += nHashValue;
    return nRet % uBucketCount;
}

int hashSqrMid234(int nKey)
{
    int nRet =0;
    int nHashValue = 0;
    nHashValue = nKey * nKey;  // 5499025

    nRet = nHashValue / 100000;   // 549
    nHashValue = nHashValue /100;   // 54990

    nRet = nHashValue % (nRet * 1000);
    return nRet;
}

MyHashTable::MyHashTable()
{
    m_nBucketCount = 1024;
    m_nNodeCount = 0;
    m_nCurrNodeID = 0;
    m_pCurrentNode = NULL;
    m_pLinkHead = NULL;
    m_pLinkTail = NULL;

    m_pBucket = (SINGLE_NODE**)malloc(m_nBucketCount * sizeof(SINGLE_NODE*));
    memset(m_pBucket, 0, m_nBucketCount * sizeof(SINGLE_NODE*));
}

MyHashTable::~MyHashTable()
{
    SINGLE_NODE *pNode = NULL;

    for (int i = 0 ; i < m_nBucketCount; i++)
    {
        pNode = m_pBucket[i];
        while (pNode != NULL)
        {
            m_pBucket[i] = pNode->pNext;
            free(pNode);
            pNode = NULL;
            pNode = m_pBucket[i];

        }
    }
}

int MyHashTable::calcHash(int nKey)
{
    return hashInt1(nKey, m_nBucketCount);
}


int MyHashTable::InsertHashTable(int nData)
{
    SINGLE_NODE *pNode = NULL;
    SINGLE_NODE *pNewNode;
    int nHashValue;

    pNewNode = (SINGLE_NODE*)malloc(sizeof(SINGLE_NODE));

    nHashValue = calcHash(nData);
    pNode = m_pBucket[nHashValue];

    cout << "Insert Hash Key: " << nHashValue  << "  Value=" << nData << endl;

    pNewNode->nData = nData;
    pNewNode->pHashNext = pNode;

    m_pBucket[nHashValue] = pNewNode;
    m_nNodeCount += 1;


    // add to link
    pNewNode->pNext = NULL;
    pNewNode->pPrev = NULL;
    if (m_pLinkHead == NULL)
    {
        m_pLinkHead = pNewNode;
        m_pLinkTail = pNewNode;
    }
    else
    {
        pNode = m_pLinkHead;
        while ((pNode->pNext != NULL) && (pNewNode->nData > pNode->nData))
        {
            pNode = pNode->pNext;
        }

        if ((pNode->pNext == NULL) && (pNewNode->nData >  pNode->nData))
        {
            pNewNode->pPrev = pNode;
            pNode->pNext = pNewNode;
            m_pLinkTail = pNewNode;
        }
        else
        {
            pNewNode->pPrev = pNode->pPrev;
            if (pNode != m_pLinkHead)
            {
                pNode->pPrev->pNext = pNewNode;
            }
            else
            {
                m_pLinkHead = pNewNode;
            }

            pNewNode->pNext = pNode;
            pNode->pPrev = pNewNode;
        }
    }

    return m_nNodeCount;
}

void MyHashTable::ShowSortLink()
{
    SINGLE_NODE *pNode = m_pLinkHead;
    cout << endl;
    cout << endl;
    cout << "Show all the node ==========================" << endl;
    while(pNode != NULL)
    {
        cout << "key= " << pNode->nData << "   ";
        pNode = pNode->pNext;
    }
    cout << endl;
    cout << endl;
}

int MyHashTable::FindHash(int nData)
{
    SINGLE_NODE *pNode;
    int nHashValue;

    nHashValue = calcHash(nData);
    pNode = m_pBucket[nHashValue];

    while(pNode != NULL)
    {
        if (pNode->nData == nData)
        {
            cout << "FindHash find it Key= " <<nHashValue << " Data=" << pNode->nData << endl;
            return pNode->nData;
        }
        pNode = pNode->pNext;
    }

    return -1;
}

void MyHashTable::DeleteHash(int nData)
{
    SINGLE_NODE *pNode;
    int nHashValue;

    nHashValue = calcHash(nData);
    pNode = m_pBucket[nHashValue];

    if (pNode != NULL)
    {
        m_pBucket[nHashValue] = pNode->pNext;
        pNode->pPrev->pNext = pNode->pNext;
        pNode->pNext->pPrev = pNode->pPrev;

        free(pNode);
        pNode = NULL;
    }
}

main.cpp

#include "hash_link_table.h"

void main()
{
    int testArr[10] = {123, 4332,223,8664,2239,89777,54455,32,23,64322};
    int i = 0;

    MyHashTable hashTable;

    for (i = 0; i < 10; i++)
    {
        hashTable.InsertHashTable(testArr[i]);
    }


    cout << endl;
    cout << "Finding the hash kes and values" << endl;
    cout << endl;
    for (i = 0; i < 10; i++)
    {
        if (hashTable.FindHash(testArr[i]) != -1)
        {
            cout << "========================" << endl;
        }
    }

    hashTable.ShowSortLink();

    hashTable.DeleteHash(testArr[3]);

    cout << endl;
    cout << "Test the 3 the key is deleted" << endl;
    cout << endl;
    for (i = 0; i < 10; i++)
    {
        if (hashTable.FindHash(testArr[i]) != -1)
        {
            cout << "========================" << endl;
        }
    }

    hashTable.ShowSortLink();
    cin >> i;
}

测试结果：

Insert Hash Key: 123 Value=123
Insert Hash Key: 236 Value=4332
Insert Hash Key: 223 Value=223
Insert Hash Key: 472 Value=8664
Insert Hash Key: 191 Value=2239
Insert Hash Key: 689 Value=89777
Insert Hash Key: 183 Value=54455
Insert Hash Key: 32 Value=32
Insert Hash Key: 23 Value=23
Insert Hash Key: 834 Value=64322

Finding the hash kes and values

FindHash find it Key= 123 Data=123
========================
FindHash find it Key= 236 Data=4332
========================
FindHash find it Key= 223 Data=223
========================
FindHash find it Key= 472 Data=8664 test deleting
========================
FindHash find it Key= 191 Data=2239
========================
FindHash find it Key= 689 Data=89777
========================
FindHash find it Key= 183 Data=54455
========================
FindHash find it Key= 32 Data=32
========================
FindHash find it Key= 23 Data=23
========================
FindHash find it Key= 834 Data=64322
========================

Show all the node ==========================
key= 23 key= 32 key= 123 key= 223 key= 2239 key= 4332 key= 8664 key= 54455 key= 64322 key= 89777

Test the 3 the key is deleted

FindHash find it Key= 123 Data=123
========================
FindHash find it Key= 236 Data=4332
========================
FindHash find it Key= 223 Data=223
========================
FindHash find it Key= 191 Data=2239
========================
FindHash find it Key= 689 Data=89777
========================
FindHash find it Key= 183 Data=54455
========================
FindHash find it Key= 32 Data=32
========================
FindHash find it Key= 23 Data=23
========================
FindHash find it Key= 834 Data=64322
========================

Show all the node ==========================
key= 23 key= 32 key= 123 key= 223 key= 2239 key= 4332 key= 54455 key= 64322 key= 89777