【杂乱算法】哈希

最新推荐文章于 2024-09-28 22:05:59 发布

HeaoDng144

最新推荐文章于 2024-09-28 22:05:59 发布

阅读量808

点赞数 17

分类专栏：数组杂乱笔记文章标签：哈希算法算法散列表

本文链接：https://blog.csdn.net/HeaoDng/article/details/141323383

版权

杂乱笔记同时被 2 个专栏收录

8 篇文章 0 订阅

订阅专栏

数组

3 篇文章 0 订阅

订阅专栏

哈希

文章目录

哈希

基础定义

哈希函数（Hash Function）

哈希函数是将任意大小的数据映射到固定大小的数据（通常是一个整数）的函数。这个固定大小的数据称为哈希值或哈希码。一个好的哈希函数应该满足以下特性：

一致性：相同的输入总是产生相同的哈希值。
高效性：计算哈希值的过程应该快速。
均匀分布：哈希值应该尽可能均匀地分布在可能的哈希值范围内，以减少冲突。

哈希表（Hash Table）

哈希表是一种数据结构，它使用哈希函数将键（key）映射到数组中的一个位置，以便快速访问记录。哈希表通常支持以下操作：

插入（Insert）：将一个键值对插入到哈希表中。
查找（Search）：根据键查找对应的值。
删除（Delete）：根据键删除对应的键值对。

哈希冲突（Hash Collision）

由于哈希函数的输出范围通常小于输入范围，不同的键可能会映射到同一个位置，这种现象称为哈希冲突。解决哈希冲突的常见方法有：

链地址法（Chaining）：每个数组位置存储一个链表，所有哈希到该位置的键值对都存储在这个链表中。

#include <iostream>
#include <list>
#include <vector>

using namespace std;

class HashTable {
private:
    vector<list<int>> table;
    int size;

    int hashFunction(int key) {
        return key % size;
    }

public:
    HashTable(int s) : size(s) {
        table.resize(size);
    }

    void insert(int key) {
        int index = hashFunction(key);
        table[index].push_back(key);
    }

    void remove(int key) {
        int index = hashFunction(key);
        table[index].remove(key);
    }

    bool search(int key) {
        int index = hashFunction(key);
        for (int val : table[index]) {
            if (val == key) {
                return true;
            }
        }
        return false;
    }

    void display() {
        for (int i = 0; i < size; ++i) {
            cout << "Bucket " << i << ": ";
            for (int val : table[i]) {
                cout << val << " ";
            }
            cout << endl;
        }
    }
};

int main() {
    HashTable ht(7);

    ht.insert(10);
    ht.insert(20);
    ht.insert(15);
    ht.insert(7);

    ht.display();

    cout << "Search 15: " << (ht.search(15) ? "Found" : "Not Found") << endl;
    cout << "Search 5: " << (ht.search(5) ? "Found" : "Not Found") << endl;

    ht.remove(15);
    ht.display();

    return 0;
}

开放地址法（Open Addressing）：当发生冲突时，寻找哈希表中的下一个空闲位置来存储冲突的键值对。常见的开放地址法包括线性探测、二次探测和双重哈希。

线性探测法：

如果遇到冲突即向后移动，直至能插入为止。

#include <iostream>
#include <vector>

using namespace std;

class HashTable {
private:
    vector<int> table;
    int size;
    int emptyValue;

    int hashFunction(int key) {
        return key % size;
    }

public:
    HashTable(int s, int emptyVal = -1) : size(s), emptyValue(emptyVal) {
        table.resize(size, emptyValue);
    }

    void insert(int key) {
        int index = hashFunction(key);
        while (table[index] != emptyValue) {
            index = (index + 1) % size;
        }
        table[index] = key;
    }

    void remove(int key) {
        int index = hashFunction(key);
        while (table[index] != emptyValue) {
            if (table[index] == key) {
                table[index] = emptyValue;
                return;
            }
            index = (index + 1) % size;
        }
    }

    bool search(int key) {
        int index = hashFunction(key);
        while (table[index] != emptyValue) {
            if (table[index] == key) {
                return true;
            }
            index = (index + 1) % size;
        }
        return false;
    }

    void display() {
        for (int i = 0; i < size; ++i) {
            if (table[i] != emptyValue) {
                cout << "Index " << i << ": " << table[i] << endl;
            } else {
                cout << "Index " << i << ": " << "Empty" << endl;
            }
        }
    }
};

int main() {
    HashTable ht(7);

    ht.insert(10);
    ht.insert(20);
    ht.insert(15);
    ht.insert(7);

    ht.display();

    cout << "Search 15: " << (ht.search(15) ? "Found" : "Not Found") << endl;
    cout << "Search 5: " << (ht.search(5) ? "Found" : "Not Found") << endl;

    ht.remove(15);
    ht.display();

    return 0;
}

二次探测法：

遇到冲突即采取平方的方式进行移动。

#include <iostream>
#include <vector>

using namespace std;

class HashTable {
private:
    vector<int> table;
    int size;
    int emptyValue;

    int hashFunction(int key) {
        return key % size;
    }

public:
    HashTable(int s, int emptyVal = -1) : size(s), emptyValue(emptyVal) {
        table.resize(size, emptyValue);
    }

    void insert(int key) {
        int index = hashFunction(key);
        int i = 0;
        while (table[(index + i * i) % size] != emptyValue) {
            i++;
        }
        table[(index + i * i) % size] = key;
    }

    void remove(int key) {
        int index = hashFunction(key);
        int i = 0;
        while (table[(index + i * i) % size] != emptyValue) {
            if (table[(index + i * i) % size] == key) {
                table[(index + i * i) % size] = emptyValue;
                return;
            }
            i++;
        }
    }

    bool search(int key) {
        int index = hashFunction(key);
        int i = 0;
        while (table[(index + i * i) % size] != emptyValue) {
            if (table[(index + i * i) % size] == key) {
                return true;
            }
            i++;
        }
        return false;
    }

    void display() {
        for (int i = 0; i < size; ++i) {
            if (table[i] != emptyValue) {
                cout << "Index " << i << ": " << table[i] << endl;
            } else {
                cout << "Index " << i << ": " << "Empty" << endl;
            }
        }
    }
};

int main() {
    HashTable ht(7);

    ht.insert(10);
    ht.insert(20);
    ht.insert(15);
    ht.insert(7);

    ht.display();

    cout << "Search 15: " << (ht.search(15) ? "Found" : "Not Found") << endl;
    cout << "Search 5: " << (ht.search(5) ? "Found" : "Not Found") << endl;

    ht.remove(15);
    ht.display();

    return 0;
}

双重哈希：

采用两个哈希函数，一个函数计算初始哈希值，另外一个函数计算哈希步长，遇到冲突就在初始值上加哈希步长。

#include <iostream>
#include <vector>

using namespace std;

class HashTable {
private:
    vector<int> table;
    int size;
    int emptyValue;

    int hashFunction1(int key) {
        return key % size;
    }

    int hashFunction2(int key) {
        return 1 + (key % (size - 1));
    }

public:
    HashTable(int s, int emptyVal = -1) : size(s), emptyValue(emptyVal) {
        table.resize(size, emptyValue);
    }

    void insert(int key) {
        int index = hashFunction1(key);
        int stepSize = hashFunction2(key);
        while (table[index] != emptyValue) {
            index = (index + stepSize) % size;
        }
        table[index] = key;
    }

    void remove(int key) {
        int index = hashFunction1(key);
        int stepSize = hashFunction2(key);
        while (table[index] != emptyValue) {
            if (table[index] == key) {
                table[index] = emptyValue;
                return;
            }
            index = (index + stepSize) % size;
        }
    }

    bool search(int key) {
        int index = hashFunction1(key);
        int stepSize = hashFunction2(key);
        while (table[index] != emptyValue) {
            if (table[index] == key) {
                return true;
            }
            index = (index + stepSize) % size;
        }
        return false;
    }

    void display() {
        for (int i = 0; i < size; ++i) {
            if (table[i] != emptyValue) {
                cout << "Index " << i << ": " << table[i] << endl;
            } else {
                cout << "Index " << i << ": " << "Empty" << endl;
            }
        }
    }
};

int main() {
    HashTable ht(7);

    ht.insert(10);
    ht.insert(20);
    ht.insert(15);
    ht.insert(7);

    ht.display();

    cout << "Search 15: " << (ht.search(15) ? "Found" : "Not Found") << endl;
    cout << "Search 5: " << (ht.search(5) ? "Found" : "Not Found") << endl;

    ht.remove(15);
    ht.display();

    return 0;
}

[!NOTE]

哈希只是一种算法，能实践哈希算法的载体有很多，但不变的都是数据映射，例如我们可以将数据映射在数组、map等等。

哈希的应用

1、重复查询（哈希集合）

哈希集合 是 集合 数据结构的实现之一，用于存储 非重复值。

#include <iostream>
#include <unordered_set>

using namespace std;

int main() {
    // 创建一个无序集合
    unordered_set<int> elements;

    // 插入一些元素
    elements.insert(10);
    elements.insert(20);
    elements.insert(30);

    // 查询元素是否在集合中
    int queryElement = 20;
    if (elements.find(queryElement) != elements.end()) {
        cout << queryElement << " 在集合中。" << endl;
    } else {
        cout << queryElement << " 不在集合中。" << endl;
    }

    queryElement = 40;
    if (elements.find(queryElement) != elements.end()) {
        cout << queryElement << " 在集合中。" << endl;
    } else {
        cout << queryElement << " 不在集合中。" << endl;
    }

    return 0;
}

映射	底层实现	是否有序	数值是否可以重复	能否更改数值	查询效率	增删效率
map	红黑树	key有序	key不可重复	key不可修改	O(log n)	O(log n)
multimap	红黑树	key有序	key可重复	key不可修改	O(log n)	O(log n)
unordered_map	哈希表	key无序	key不可重复	key不可修改	O(1)	O(1)

数独中的哈希判断是否符合

重要公式：idx=(i/3)*3+(j/3);判断是位于哪个九宫格（下标从0开始）

#include <iostream>
#include <vector>
#include <unordered_set>

using namespace std;

class Solution {
public:
    bool isValidSudoku(vector<vector<char>>& board) {
        vector<unordered_set<int>> row(9), col(9), area(9);

        for (int i = 0; i < 9; i++) {
            for (int j = 0; j < 9; j++) {
                char c = board[i][j];
                if (c == '.') continue;
                int u = c - '0';
                int idx = (i / 3) * 3 + (j / 3);

                if (row[i].count(u) || col[j].count(u) || area[idx].count(u)) 
                    return false;

                row[i].insert(u);
                col[j].insert(u);
                area[idx].insert(u);
            }
        }
        return true;
    }
};

int main() {
    Solution solution;
    vector<vector<char>> board = {
        {'5', '3', '.', '.', '7', '.', '.', '.', '.'},
        {'6', '.', '.', '1', '9', '5', '.', '.', '.'},
        {'.', '9', '8', '.', '.', '.', '.', '6', '.'},
        {'8', '.', '.', '.', '6', '.', '.', '.', '3'},
        {'4', '.', '.', '8', '.', '3', '.', '.', '1'},
        {'7', '.', '.', '.', '2', '.', '.', '.', '6'},
        {'.', '6', '.', '.', '.', '.', '2', '8', '.'},
        {'.', '.', '.', '4', '1', '9', '.', '.', '5'},
        {'.', '.', '.', '.', '8', '.', '.', '7', '9'}
    };

    if (solution.isValidSudoku(board)) {
        cout << "The Sudoku board is valid." << endl;
    } else {
        cout << "The Sudoku board is invalid." << endl;
    }

    return 0;
}

2、字符串哈希

采用两个大质数对哈希值进行取模能有效降低哈希碰撞。

typedef unsigned long long ull;
ull base = 131;
ull mod1 = 212370440130137957, mod2 = 1e9 + 7;

ull get_hash1(std::string s) {
  int len = s.size();
  ull ans = 0;
  for (int i = 0; i < len; i++) ans = (ans * base + (ull)s[i]) % mod1;
  return ans;
}

ull get_hash2(std::string s) {
  int len = s.size();
  ull ans = 0;
  for (int i = 0; i < len; i++) ans = (ans * base + (ull)s[i]) % mod2;
  return ans;
}

bool cmp(const std::string s, const std::string t) {
  bool f1 = get_hash1(s) != get_hash1(t);
  bool f2 = get_hash2(s) != get_hash2(t);
  return f1 || f2;
}