c++逆天改命进阶--哈希表

最新推荐文章于 2024-09-03 16:51:01 发布

逃跑的机械工

最新推荐文章于 2024-09-03 16:51:01 发布

阅读量305

点赞数 1

分类专栏： c++逆天改命文章标签： c++ 散列表哈希算法

本文链接：https://blog.csdn.net/btzxlin/article/details/125182054

版权

c++逆天改命专栏收录该内容

18 篇文章 1 订阅

订阅专栏

本文介绍了C++11中unordered_set和unordered_map的高效使用，比较了它们与红黑树容器在查询性能上的改进，并通过实例演示了哈希表在查找和去重操作中的优势。还讨论了直接定址法和两种散列冲突解决方法——闭散列与开散列的实现原理。

摘要由CSDN通过智能技术生成

1.unordered系列关联式容器

在C++98中，STL提供了底层为红黑树结构的一系列关联式容器，在查询时效率可达到，即最差情况下需要比较红黑树的高度次，当树中的节点非常多时，查询效率也不理想。最好的查询是，进行很少的比较次数就能够将元素找到，因此在C++11中，STL又提供了4个unordered系列的关联式容器，这四个容器与红黑
树结构的关联式容器使用方式基本类似，只是其底层结构不同。

2.unordered_set和unordered_map的练习使用

#include <iostream>
#include <unordered_set>
#include <unordered_map>
#include <algorithm>
#include <string>
#include <vector>
#include <time.h>
#include <set>
using namespace std;

void test_unordered_set1()
{
	unordered_set<int> us;
	us.insert(2);
	us.insert(1);
	us.insert(2);
	us.insert(3);
	us.insert(5);
	us.insert(6);
	us.insert(2);
	us.insert(6);
	unordered_set<int>::iterator it = us.begin();
	while (it != us.end())
	{
		cout << *it << " ";
		++it;
	}
	cout << endl;

	for (auto e : us)
	{
		cout << e << " ";
	}
	cout << endl;

	//auto pos = us.find(2);
	auto pos = us.find(12);
	//我们直到在算法里面也有一个find 那么这两个find有什么区别呢？
	//auto pos = find(us.begin(), us.end(), 2);
	//这是一个通用的算法，但是这是一种暴力查找，O(N),效率低
	//而unordered_set里面自带的find(),在查找的时候利用了哈希的特性，效率高O(1)
	if (pos != us.end())
	{
		cout << "找到了" << endl;
	}
	else
	{
		cout << "找不到了" << endl;
	}
}

void test_unordered_map1()
{
	unordered_map<string, string> dict;
	dict.insert(make_pair("happy", "开心的"));
	dict.insert(make_pair("left", "左边"));
	auto it = dict.begin();
	while (it != dict.end())
	{
		cout << it->first << ":" << it->second << endl;
		++it;
	}
}

//下面我们在release版本下面对于set和unordered_set进行效率的对比
void test_op()
{
	int N = 1000000;
	vector<int> v;
	v.reserve(N);
	srand(time(0));
	while (N--)
	{
		v.push_back(rand());
	}

	set<int> s;
	unordered_set<int> us;
	size_t begin1 = clock();
	for (auto e : v)
	{
		s.insert(e);
	}
	size_t end1 = clock();
	size_t begin2 = clock();

	for (auto e : v)
	{
		us.insert(e);
	}
	size_t end2 = clock();
	cout << "s:" << end1 - begin1 << endl;
	cout << "us:" << end2 - begin2 << endl;
	
}


int main()
{
	//test_unordered_set1();
	//test_unordered_map1();
	test_op();
	return 0;
}

3.在长度 2N 的数组中找出重复 N 次的元素

class Solution {
public:
    int repeatedNTimes(vector<int>& nums) 
    {
        unordered_map<int,int> countMap;//用来统计每个数字出现的次数
        for(auto e: nums)
        {
            countMap[e]++;//实际上调用的是insert，不存在就插入pair(e,0)，然后把0++变成1，存在就把second++
        }
        for(auto& e : countMap)//遍历，直到找到出现n次的元素
        {
            if(e.second == nums.size()/2)
            {
                return e.first;
            }

        }
        return 0;//理论上不可能执行到这一步，但是不写会报错，必须每条路径都有返回值
    }
};

4.两个数组的交集

class Solution {
public:
    vector<int> intersection(vector<int>& nums1, vector<int>& nums2) 
    {
        //先利用set去重并排升序
        set<int> s1;
        set<int> s2;
        for(auto e : nums1)
        {
            s1.insert(e);
        }
        for(auto e : nums2)
        {
            s2.insert(e);
        }
        auto ps1 = s1.begin();
        auto ps2 = s2.begin();
        vector<int> Vret;
        //双指针
        while(ps1 != s1.end() && ps2 != s2.end())
        {
            //不相等，小的指针向后走
            if(*ps1 < *ps2)
            {
                ++ps1;
            }
            else if(*ps1 > *ps2)
            {
                ++ps2;
            }
            //相等，说明是交集，放进Vret里，两个指针一起向后走，直到其中一个走到空
            else
            {
                Vret.push_back(*ps1);
                ++ps1;
                ++ps2;
            }
        }
        return Vret;

    }

5.哈希概念

顺序结构以及平衡树中，元素关键码与其存储位置之间没有对应的关系，因此在查找一个元素时，必须要经过关键码的多次比较。顺序查找时间复杂度为O(N)，平衡树中为树的高度，即O(log(2)N )，搜索的效率取决于搜索过程中元素的比较次数。

理想的搜索方法：可以不经过任何比较，一次直接从表中得到要搜索的元素。如果构造一种存储结构，通过某种函数(hashFunc)使元素的存储位置与它的关键码之间能够建立一一映射的关系，那么在查找时通过该函数可以很快找到该元素。

插入元素

根据待插入元素的关键码，以此函数计算出该元素的存储位置并按此位置进行存放
搜索元素

对元素的关键码进行同样的计算，把求得的函数值当做元素的存储位置，在结构中按此位置取元素比较，若关键码相等，则搜索成功

该方式即为哈希(散列)方法，哈希方法中使用的转换函数称为哈希(散列)函数，构造出来的结构称为哈希表(Hash Table)(或者称散列表)

6.直接定址法

取关键字的某个线性函数为散列地址：Hash（Key）= A*Key + B

字符串中的第一个唯一字符

class Solution {
public:
    int firstUniqChar(string s) 
    {
        int count[26] = {0};//用来存放26个小写字母出现的次数
        for(auto ch : s)
        {
            count[ch - 'a']++;
        }
        for(int i = 0;i<s.size();++i)
        {
            if(count[s[i]-'a']==1)
            {
                return i;
            }
        }
        return -1;

    }
};

缺陷：

给你一组数据范围很大，会浪费很多空间
不能处理浮点数，字符串等数据

7.除留余数法

那么，如何解决哈希冲突呢？

7.1闭散列

#pragma once
#include <iostream>
#include <vector>
using namespace std;

namespace CloseHash
{
	enum State
	{
		EMPTY,
		EXITS,
		DELETE
	};
	template<class K, class V>
	struct HashData
	{
		pair<K, V> _kv;
		State _state = EMPTY;
	};
	template <class K>
	struct Hash
	{
		size_t operator()(const K& key)
		{
			return key;
		}
	};
	template<>
	struct Hash<string>
	{
		size_t operator()(const string& s)
		{
			size_t value = 0;
			//BKDR Hash
			for (auto ch : s)
			{
				value += ch;
				value *= 131;
			}
			return value;
		}
		
	};


	template<class K, class V, class HashFunc = Hash<K>>
	class HashTable
	{
	public:
		bool Insert(const pair<K, V>& kv)
		{
			HashData<K, V>* ret = Find(kv.first);
			if (ret)
			{
				return false;
			}
			if (_table.size() == 0)//初次插入直接分配10个空间
			{
				_table.resize(10);
			}
			//负载因子越大，冲突概率越大，效率越低
			//负载因子越小，浪费空间越多。我们这里让负载因子大于0.7就增容
			else if ((double)_n / (double)_table.size() > 0.7)
			{
				//增容
				HashTable<K, V> newHT;
				newHT._table.resize(_table.size() * 2);//新建一个哈希表，扩容到原来的2倍
				for (auto& e : _table)//遍历原表，把存在的数据插入到新表
				{
					if (e._state == EXITS)
					{
						newHT.Insert(e._kv);
					}
				}
				_table.swap(newHT._table);//交换两表
			}
			HashFunc hf;
			size_t start = hf(kv.first) % _table.size();//找到插入的位置
			size_t index = start;
			size_t i = 1;
			while (_table[index]._state != EMPTY)//如果当前位置不为空就要向后找位置
			{
				//线性探测或二次探测
				index = start + i;
				index %= _table.size();//如果不加这一句，可能会导致越界(index>_table.size())
				++i;
			}
			_table[index]._kv = kv;
			_table[index]._state = EXITS;
			++_n;
			return true;
		}
		HashData<K, V>* Find(const K& key)
		{
			if (_table.size() == 0)
			{
				return nullptr;
			}
			HashFunc hf;
			size_t i = 1;
			size_t start = hf(key) % _table.size();
			int index = start;
			while (_table[index]._state != EMPTY)
			{
				if (_table[index]._state == EXITS 
					&& _table[index]._kv.first == key)
				{
					return &_table[index];
				}
				index = start + i;
				index %= _table.size();
				++i;
			}
			return nullptr;

		}
		bool Erase(const K& key)
		{
			HashData<K, V>* ret = Find(key);
			if (ret == nullptr)
			{
				return false;
			}
			else
			{
				ret->_state = DELETE;
				return true;
			}
		}
	private:
		vector<HashData<K, V>> _table;
		size_t _n = 0;//存储数据的有效个数
	};
	void TestHashTable1()
	{
		int a[] = { 1, 5, 10, 100000, 100, 18, 15, 7, 40 };
		HashTable<int, int> ht;
		for (auto e : a)
		{
			ht.Insert(make_pair(e, e));
		}
		auto ret = ht.Find(100);
		if (ret)
		{
			cout << "找到了" << endl;
		}
		else
		{
			cout << "没找到" << endl;
		}
		ht.Erase(100);
		ret = ht.Find(100);
		if (ret)
		{
			cout << "找到了" << endl;
		}
		else
		{
			cout << "没找到" << endl;
		}
	}

	void TestHashTable2()
	{
		string a[] = { "苹果", "西瓜", "苹果", "西瓜", "苹果", "橘子", "苹果" };
		HashTable<string, int> ht;
		for (auto str : a)
		{
			auto ret = ht.Find(str);
			if (ret)
			{
				ret->_kv.second++;
			}
			else
			{
				ht.Insert(make_pair(str, 1));
			}
		}

	}
}

7.2开散列

namespace OpenHash
{
	template <class K>
	struct Hash
	{
		size_t operator()(const K& key)
		{
			return key;
		}
	};
	template<>
	struct Hash<string>
	{
		size_t operator()(const string& s)
		{
			size_t value = 0;
			//BKDR Hash
			for (auto ch : s)
			{
				value += ch;
				value *= 131;
			}
			return value;
		}

	};
	template <class K, class V>
    struct HashNode
	{
		pair<K, V> _kv;
		HashNode<K, V>* _next;
		HashNode(const pair<K, V>& kv)
			:_kv(kv)
			,_next(nullptr)
		{}
	};
	template <class K, class V, class HashFunc = Hash<K>>
	class HashTable
	{
		typedef HashNode<K, V> Node;
	public:
		bool Insert(const pair<K, V> kv)
		{
			if (Find(kv.first))
			{
				return false;
			}
			HashFunc hf;
			if (_n == _table.size())//负载因子到1时开始扩容
			{
				vector<Node*> newtable;
				size_t newsize = _table.size() == 0 ? 8 : _table.size() * 2;
				newtable.resize(newsize, nullptr);
				//遍历旧表中非空节点，重新计算位置头插放到新表
				for (int i = 0;i < _table.size();++i)
				{
					if (_table[i])
					{
						Node* cur = _table[i];
						while (cur)
						{
							Node* next = cur->_next;
							size_t index = hf(cur->_kv.first) % newtable.size();
							//头插
							cur->_next = newtable[index];
							newtable[index] = cur;
							cur = next;
						}
						_table[i] = nullptr;
					}
				}
				_table.swap(newtable);
			}
			size_t index = hf(kv.first) % _table.size();
			Node* newNode = new Node(kv);
			newNode->_next = _table[index];
			_table[index] = newNode;
			++_n;
			return true;
		}
		Node* Find(const K& key)
		{
		
			if (_table.size() == 0)
			{
				return nullptr;
			}
			HashFunc hf;
			size_t index = hf(key) % _table.size();
			if (_table[index])
			{
				Node* cur = _table[index];
				while (cur)
				{
					if (cur->_kv.first == key)
					{
						return cur;
					}
					else
					{
						cur = cur->_next;
					}
				}
			}
			return nullptr;

		}
		bool Erase(const K& key)
		{
			if (_table.size() == 0)
			{
				return false;
			}
			HashFunc hf;
			size_t index = hf(key) % _table.size();
			Node* cur = _table[index];
			Node* prev = nullptr;
			while (cur)
			{
				if (cur->_kv.first == key)
				{
					if (cur == _table[index])
					{
						_table[index] = nullptr;
					}
					else
					{
						prev->_next = next;
					}
					delete cur;
					--_n;
					return true;
				}
				else
				{
					prev = cur;
					cur = cur->_next;
				}
			}
			return false;
		}
	private:
		vector<Node*> _table;
		size_t _n = 0;
	};
	void TestHashTable1()
	{
		int a[] = { 1, 5, 30, 100000, 100, 18, 15, 7, 40, 44 };
		HashTable<int, int> ht;
		for (auto e : a)
		{
			ht.Insert(make_pair(e, e));
		}
		ht.Insert(make_pair(25, 25));
	}
	void TestHashTable2()
	{
		string a[] = { "苹果", "西瓜", "苹果", "西瓜", "苹果", "橘子", "苹果" };
		HashTable<string, int> ht;
		for (auto str : a)
		{
			auto ret = ht.Find(str);
			if (ret)
			{
				ret->_kv.second++;
			}
			else
			{
				ht.Insert(make_pair(str, 1));
			}
		}
	}
}