13.哈希表（Hashtable）的实现

拾雪藏于春

已于 2023-11-15 13:31:18 修改

阅读量436

点赞数 9

分类专栏： c++ 文章标签：散列表哈希算法算法 c++

于 2023-11-14 10:50:49 首次发布

本文链接：https://blog.csdn.net/weixin_62318955/article/details/134394053

版权

c++ 专栏收录该内容

19 篇文章 0 订阅

订阅专栏

哈希表（Hashtable）的实现

实现要求：

我们这里主要想实现一个可以处理多种不同的字符串哈希函数的通用哈希表，处理哈希冲突使用的是拉链法：

/*************************************************************************
        > File Name: 15.cpp
        > Author:Xiao Yuheng
        > Mail:3312638794@qq.com
        > Created Time: Wed Nov  8 22:38:19 2023
 ************************************************************************/

#include <iostream>
#include <vector>
#include <functional>

using namespace std;

class Node {
public:
    Node() = default;
    Node(string, int, Node *);
    string key();
    int value;
    Node *next();
    void set_next(Node *);
    void insert(Node *);
    void erase_next();

private:
    string __key;
    Node *__next;
};



class HashTable {
public:
    typedef function<int(string)> HASH_FUNC_T;
    HashTable (HASH_FUNC_T hash_func, int size);
    bool insert(string, int);
    bool erase(string);
    bool find(string);
    int capacity();
    int &operator[](string);
    ~HashTable();

private:
    Node *__insert(string, int);
    Node *__find(string);
    void __expand();
    int __size, data_cnt;
    vector<Node> data;
    HASH_FUNC_T hash_func;
};



int BKDRHash(string s) {
    int seed = 31;
    int h = 0;
    for (int i = 0; s[i]; i++) {
        h = h * seed + s[i];
    }
    return h & 0x7fffffff;
}

class APHash_Class {
public:
    int operator() (string s) {
        int h = 0;
        for (int i = 0; s[i]; i++) {
            if (i % 2) {
                h = (h << 3) ^ s[i] ^ (h >> 5);
            } else {
                h = ~((h << 7) ^ s[i] ^ (h >> 11));
            }
        }
        return h & 0x7fffffff;
    }
};

int main() {
    APHash_Class APHash;
    HashTable h1(BKDRHash);
    HashTable h2(APHash);
    int op;
    string s;
    cout << h1.capacity() << endl;
    cout << h2.capacity() << endl;
    h1["hello"] = 123;
    h1["world"] = 456;
    h1["haizei"] = 789;
    cout << h1.capacity() << endl;
    cout << h2.capacity() << endl;
    cout << h1["hello"] << " " << h1["world"] << " " << h1["hahaha"] << endl;
    while (cin >> op >> s) {
        switch(op) {
            case 0: {
                cout << "insert " << s << " to hash table 1 = ";
                cout << h1.insert(s) << endl;
                cout << "insert " << s << " to hash table 2 = ";
                cout << h2.insert(s) << endl;
            } break;
            case 1: {
                cout << "erase " << s << " from hash table 1 = ";
                cout << h1.erase(s) << endl;
                cout << "erase " << s << " from hash table 2 = ";
                cout << h2.erase(s) << endl;
            } break;
            case 2: {
                cout << "find " << s << " at hash table 1 = ";
                cout << h1.find(s) << endl;
                cout << "find " << s << " at hash table 2 = ";
                cout << h2.find(s) << endl;
            } break;
        }
    }
    return 0;
}

我们需要实现可以满足主函数的所有功能，简单解释一下需要实现哪些类及其功能：

Node 类

class Node {
public:
    Node() = default;
    Node(string, int, Node *);
    string key();
    int value;
    Node *next();
    void set_next(Node *);
    void insert(Node *);
    void erase_next();

private:
    string __key;
    Node *__next;
};

Node 类表示哈希表中的节点。
每个节点包含一个键（__key）、一个值（value），以及指向下一个节点的指针（__next）。
insert 函数用于在当前节点后插入一个节点，erase_next 函数用于删除下一个节点。

HashTable 类

class HashTable {
public:
    typedef function<int(string)> HASH_FUNC_T;
    HashTable (HASH_FUNC_T hash_func, int size);
    bool insert(string, int);
    bool erase(string);
    bool find(string);
    int capacity();
    int &operator[](string);
    void swap(HashTable &);
    ~HashTable();

private:
    Node *__insert(string, int);
    Node *__find(string);
    void __expand();
    int __size, data_cnt;
    vector<Node> data;
    HASH_FUNC_T hash_func;
};

HashTable 类表示哈希表，包含一些基本的哈希表操作。
hash_func 是用户提供的哈希函数。
insert、erase、find、capacity、operator[] 等函数实现了哈希表的插入、删除、查找、获取容量和重载数组访问操作符等功能。
__expand 函数用于在哈希表容量不足时进行扩展。
swap 函数用于交换两个哈希表的内部状态。

实现`Node`类的功能：

Node::Node(string key, int value = 0, Node *node = nullptr) : __key(key), value(value), __next(node) {}
string Node::key() { return __key; }
Node *Node::next() { return __next; }
void Node::set_next(Node *node) { __next = node; }
void Node::insert(Node *node) {
    node->set_next(this->next());
    this->set_next(node);
}
void Node::erase_next() {
    Node *p = this->next();
    if (p == nullptr) return ;
    this->set_next(this->next()->next());
    delete p;
}

这是关于 Node 类成员函数的实现：

`Node` 类构造函数

Node::Node(string key, int value, Node *node) : __key(key), value(value), __next(node) {}

Node 类的构造函数，接受三个参数：key（键值），value（值，默认为0），和 node（下一个节点，默认为nullptr）。
使用初始化列表初始化了 __key、value 和 __next。

`key` 函数

string Node::key() {
    return __key;
}

key 函数用于返回节点的键值。

`next` 函数

Node *Node::next() {
    return __next;
}

next 函数用于返回指向下一个节点的指针。

`set_next` 函数

void Node::set_next(Node *node) {
    __next = node;
}

set_next 函数用于设置下一个节点的指针。

`insert` 函数

void Node::insert(Node *node) {
    node->set_next(this->next());
    this->set_next(node);
}

insert 函数用于在当前节点后插入一个新的节点。
将新节点的下一个节点指针设置为当前节点的下一个节点，然后将当前节点的下一个节点指针设置为新节点。

`erase_next` 函数

void Node::erase_next() {
    Node *p = this->next();
    if (p == nullptr) return ;
    this->set_next(this->next()->next());
    delete p;
}

erase_next 函数用于删除当前节点的下一个节点。
如果下一个节点为空，直接返回。
否则，将当前节点的下一个节点指针设置为下下个节点，然后删除下一个节点。

实现`HashTable`类的功能：

HashTable::HashTable (HASH_FUNC_T hash_func, int size = 10) :
                    data(size), hash_func(hash_func),
                    __size(size), data_cnt(0) {}
int HashTable::capacity() { return data_cnt; }
bool HashTable::insert(string key, int value = 0) {
    Node *p = __insert(key, value);
    if (data_cnt > __size * 2) __expand();
    return p != nullptr;
}
Node *HashTable::__insert(string key, int value) {
    if (find(key)) return nullptr;
    int ind = hash_func(key) % __size;
    data[ind].insert(new Node(key, value));
    data_cnt += 1;
    return data[ind].next();
}
bool HashTable::erase(string key) {
    if (!find(key)) false;
    int ind = hash_func(key) % __size;
    Node *p = &data[ind];
    while (p->next() && p->next()->key() != key) { p = p->next(); }
    if (p->next() == nullptr) return false;
    p->erase_next();
    data_cnt--;
    return true;
}
bool HashTable::find(string key) {
    Node *p = __find(key);
    if (p == nullptr) return false;
    return true;
}
Node *HashTable::__find(string key) {
    int ind = hash_func(key) % __size;
    Node *p = &data[ind];
    while (p && p->key() != key) { p = p->next(); }
    return p;
}
int &HashTable::operator[](string key) {
    Node *p = __find(key);
    if (p) return p->value;
    insert(key);
    return __find(key)->value;
}
void HashTable::__expand() {
    HashTable h(hash_func, __size * 2);
    for (int i = 0; i < __size; i++) {
        Node *p = data[i].next();
        while(p) {
            h.insert(p->key(), p->value);
            p = p->next();
        }
    }
    swap(h);
}
void HashTable::swap(HashTable &h) {
    std::swap(__size, h.__size);
    std::swap(data_cnt, h.data_cnt);
    std::swap(data, h.data);
    std::swap(hash_func, h.hash_func);
}
HashTable::~HashTable() {
    for (int i = 0; i < __size; i++) {
        while (data[i].next()) { data[i].erase_next(); }
    }
}

这是哈希表 HashTable 类的实现，其中使用了链地址法解决哈希冲突。

构造函数

HashTable::HashTable(HASH_FUNC_T hash_func, int size) :
    data(size), hash_func(hash_func),
    __size(size), data_cnt(0) {}

构造函数接受两个参数：hash_func 为哈希函数，size 为哈希表的初始大小，默认为10。
初始化了 data 向量，存储 Node 节点。
初始化了 hash_func 和哈希表的大小 __size。
data_cnt 用于记录哈希表中元素的个数。

`capacity` 函数

int HashTable::capacity() {
    return data_cnt;
}

capacity 函数返回哈希表中元素的个数。

`insert` 函数

bool HashTable::insert(string key, int value) {
    Node *p = __insert(key, value);
    if (data_cnt > __size * 2) __expand();
    return p != nullptr;
}

insert 函数用于向哈希表中插入新元素。
首先调用 __insert 函数，如果返回的节点指针不为空，表示插入成功。
如果哈希表元素个数超过当前大小的两倍，调用 __expand 函数进行扩展。

`__insert` 函数

Node *HashTable::__insert(string key, int value) {
    if (find(key)) return nullptr;
    int ind = hash_func(key) % __size;
    data[ind].insert(new Node(key, value));
    data_cnt += 1;
    return data[ind].next();
}

__insert 函数用于在哈希表中插入新节点。
如果节点已经存在于哈希表中，返回 nullptr。
计算节点应该插入的位置，调用链表节点的 insert 函数插入新节点。
哈希表元素个数加1，返回插入的节点指针。

`erase` 函数

bool HashTable::erase(string key) {
    if (!find(key)) false;
    int ind = hash_func(key) % __size;
    Node *p = &data[ind];
    while (p->next() && p->next()->key() != key) { p = p->next(); }
    if (p->next() == nullptr) return false;
    p->erase_next();
    data_cnt--;
    return true;
}

erase 函数用于从哈希表中删除指定键的节点。
如果节点不存在于哈希表中，返回 false。
计算节点应该在哪个位置，遍历链表找到对应的节点，删除该节点。
哈希表元素个数减1，返回 true 表示删除成功。

`find` 函数

bool HashTable::find(string key) {
    Node *p = __find(key);
    if (p == nullptr) return false;
    return true;
}

find 函数用于判断哈希表中是否存在指定键的节点。
调用 __find 函数，如果返回的节点指针为空，表示不存在，返回 false，否则返回 true。

`__find` 函数

Node *HashTable::__find(string key) {
    int ind = hash_func(key) % __size;
    Node *p = &data[ind];
    while (p && p->key() != key) { p = p->next(); }
    return p;
}

__find 函数用于在哈希表中查找指定键的节点。
计算节点应该在哪个位置，遍历链表找到对应的节点，返回该节点指针。

`operator[]` 函数

int &HashTable::operator[](string key) {
    Node *p = __find(key);
    if (p) return p->value;
    insert(key);
    return __find(key)->value;
}

operator[] 函数用于实现对哈希表中元素的数组式访问。
如果键存在，返回对应节点的值的引用。
如果键不存在，调用 insert 函数插入新节点，然后再次调用 __find 函数返回对应节点的值的引用。

`__expand` 函数

void HashTable::__expand() {
    HashTable h(hash_func, __size * 2);
    for (int i = 0; i < __size; i++) {
        Node *p = data[i].next();
        while(p) {
            h.insert(p->key(), p->value);
            p = p->next();
        }
    }
    swap(h);
}

__expand 函数用于扩展哈希表的大小为原来的两倍。
创建一个新的哈希表 h，大小为原来的两倍。
遍历当前哈希表的每个位置，将链表中的节点插入新的哈希表中。
最后调用 swap 函数交换当前哈希表和新哈希表的内部状态。

`swap` 函数

void HashTable::swap(HashTable &h) {
	std::swap(__size, h.__size);
    std::swap(data_cnt, h.data_cnt);
    std::swap(data, h.data);
    std::swap(hash_func, h.hash_func);
}

swap 函数用于交换两个哈希表对象的内部状态。
使用 std::swap 交换当前哈希表和传入哈希表的大小、元素个数、数据、哈希函数。

析构函数

HashTable::~HashTable() {
    for (int i = 0; i < __size; i++) {
        while (data[i].next()) { data[i].erase_next(); }
    }
}

哈希表的析构函数用于释放动态分配的内存。
遍历哈希表的每个位置，依次删除链表中的节点。

完整代码

/*************************************************************************
        > File Name: 15.cpp
        > Author:Xiao Yuheng
        > Mail:3312638794@qq.com
        > Created Time: Wed Nov  8 22:38:19 2023
 ************************************************************************/

#include <iostream>
#include <vector>
#include <functional>

using namespace std;

class Node {
public:
    Node() = default;
    Node(string, int, Node *);
    string key();
    int value;
    Node *next();
    void set_next(Node *);
    void insert(Node *);
    void erase_next();

private:
    string __key;
    Node *__next;
};

Node::Node(string key, int value = 0, Node *node = nullptr) : __key(key), value(value), __next(node) {}
string Node::key() { return __key; }
Node *Node::next() { return __next; }
void Node::set_next(Node *node) { __next = node; }
void Node::insert(Node *node) {
    node->set_next(this->next());
    this->set_next(node);
}
void Node::erase_next() {
    Node *p = this->next();
    if (p == nullptr) return ;
    this->set_next(this->next()->next());
    delete p;
}

class HashTable {
public:
    typedef function<int(string)> HASH_FUNC_T;
    HashTable (HASH_FUNC_T hash_func, int size);
    bool insert(string, int);
    bool erase(string);
    bool find(string);
    int capacity();
    int &operator[](string);
    void swap(HashTable &);
    ~HashTable();

private:
    Node *__insert(string, int);
    Node *__find(string);
    void __expand();
    int __size, data_cnt;
    vector<Node> data;
    HASH_FUNC_T hash_func;
};

HashTable::HashTable (HASH_FUNC_T hash_func, int size = 10) :
                    data(size), hash_func(hash_func),
                    __size(size), data_cnt(0) {}
int HashTable::capacity() { return data_cnt; }
bool HashTable::insert(string key, int value = 0) {
    Node *p = __insert(key, value);
    if (data_cnt > __size * 2) __expand();
    return p != nullptr;
}
Node *HashTable::__insert(string key, int value) {
    if (find(key)) return nullptr;
    int ind = hash_func(key) % __size;
    data[ind].insert(new Node(key, value));
    data_cnt += 1;
    return data[ind].next();
}
bool HashTable::erase(string key) {
    if (!find(key)) false;
    int ind = hash_func(key) % __size;
    Node *p = &data[ind];
    while (p->next() && p->next()->key() != key) { p = p->next(); }
    if (p->next() == nullptr) return false;
    p->erase_next();
    data_cnt--;
    return true;
}
bool HashTable::find(string key) {
    Node *p = __find(key);
    if (p == nullptr) return false;
    return true;
}
Node *HashTable::__find(string key) {
    int ind = hash_func(key) % __size;
    Node *p = &data[ind];
    while (p && p->key() != key) { p = p->next(); }
    return p;
}
int &HashTable::operator[](string key) {
    Node *p = __find(key);
    if (p) return p->value;
    insert(key);
    return __find(key)->value;
}
void HashTable::__expand() {
    HashTable h(hash_func, __size * 2);
    for (int i = 0; i < __size; i++) {
        Node *p = data[i].next();
        while(p) {
            h.insert(p->key(), p->value);
            p = p->next();
        }
    }
    swap(h);
}
void HashTable::swap(HashTable &h) {
    std::swap(__size, h.__size);
    std::swap(data_cnt, h.data_cnt);
    std::swap(data, h.data);
    std::swap(hash_func, h.hash_func);
}
HashTable::~HashTable() {
    for (int i = 0; i < __size; i++) {
        while (data[i].next()) { data[i].erase_next(); }
    }
}


int BKDRHash(string s) {
    int seed = 31;
    int h = 0;
    for (int i = 0; s[i]; i++) {
        h = h * seed + s[i];
    }
    return h & 0x7fffffff;
}

class APHash_Class {
public:
    int operator() (string s) {
        int h = 0;
        for (int i = 0; s[i]; i++) {
            if (i % 2) {
                h = (h << 3) ^ s[i] ^ (h >> 5);
            } else {
                h = ~((h << 7) ^ s[i] ^ (h >> 11));
            }
        }
        return h & 0x7fffffff;
    }
};

int main() {
    APHash_Class APHash;
    HashTable h1(BKDRHash);
    HashTable h2(APHash);
    int op;
    string s;
    cout << h1.capacity() << endl;
    cout << h2.capacity() << endl;
    h1["hello"] = 123;
    h1["world"] = 456;
    h1["haizei"] = 789;
    cout << h1.capacity() << endl;
    cout << h2.capacity() << endl;
    cout << h1["hello"] << " " << h1["world"] << " " << h1["hahaha"] << endl;
    while (cin >> op >> s) {
        switch(op) {
            case 0: {
                cout << "insert " << s << " to hash table 1 = ";
                cout << h1.insert(s) << endl;
                cout << "insert " << s << " to hash table 2 = ";
                cout << h2.insert(s) << endl;
            } break;
            case 1: {
                cout << "erase " << s << " from hash table 1 = ";
                cout << h1.erase(s) << endl;
                cout << "erase " << s << " from hash table 2 = ";
                cout << h2.erase(s) << endl;
            } break;
            case 2: {
                cout << "find " << s << " at hash table 1 = ";
                cout << h1.find(s) << endl;
                cout << "find " << s << " at hash table 2 = ";
                cout << h2.find(s) << endl;
            } break;
        }
    }
    return 0;
}