数据结构——散列（哈希表）

wy_xzc

已于 2023-11-07 20:59:51 修改

阅读量48

点赞数

文章标签：数据结构散列表算法

于 2023-11-01 14:01:20 首次发布

本文链接：https://blog.csdn.net/wy_xzc/article/details/134159956

版权

tips:

跳表：还没有研究代码

一、字典序——有序链表

#include<iostream>
using namespace std;


class SortedChainNode {
    int data;
    int* link;
};
template<class E, class K>
class SortedChain {
public:
    SortedChain() { first = 0; }
    ~SortedChain();
    bool IsEmpty() const { return first == 0; }
    int Length() const;
    bool Search(const K& k, E& e) const;
    SortedChain<E, K>& Delete(const K& k, E& e);
    SortedChain<E, K>& Insert(const E& e);
    SortedChain<E, K>& DistinctInsert(const E& e);
    void Output(ostream& out) const;
private:
    SortedChainNode<E, K>* first;
};

//搜索(链表是字典序排列的）
template<class E, class K>
bool SortedChain<E, K>::Search(const K& k, E& e) const
{
    SortedChainNode<E, K>* p = first;
    while (p && p->data < k)
        p = p->link;

    // 判断是否匹配
    if (p && p->data == k) // 如果链表尚不为空且数据匹配
    {
        e = p->data; return true;
    }
    return false; // 链表已经为空，或当前数据大于k
}

//删除
template<class E, class K>
SortedChain<E, K>& SortedChain<E, K>::Delete(const K& k, E& e)
{
    SortedChainNode<E, K>* p = first, * tp = 0;
    //双指针
    // search for match with k
    while (p && p->data < k) {
        tp = p;
        p = p->link;
    }
    if (p && p->data == k) //找到了要删的节点
    {
        e = p->data;
        if (tp)  tp->link = p->link; //tp不为0，即p是普通节点
        else first = p->link;  //p是首节点
        delete p;
        return *this;
    }
    throw BadInput();  //没有可删的节点
    return *this;
}

//插入
template<class E, class K>
SortedChain<E, K>& SortedChain<E, K>::Insert(const E& e)
{
    SortedChainNode<E, K>* p = first, * tp = 0;
    while (p && p->data < e) {//同样是双指针
        tp = p;
        p = p->link;
    }
    SortedChainNode<E, K>* q = new SortedChainNode<E, K>;
    q->data = e;
    q->link = p;
    if (tp) tp->link = q;//不是插入头指针
    else first = q;
    return *this;
}

//不允许重复关键字的插入
template<class E, class K>
SortedChain<E, K>& SortedChain<E, K>
::DistinctInsert(const E& e)
{// Insert e only if no element with same key

    SortedChainNode<E, K>* p = first, * tp = 0;

    while (p && p->data < e)
    {
        tp = p;
        p = p->link;
    }
    // check if duplicate
    if (p && p->data == e) throw BadInput();
    // not duplicate, set up node for e
    SortedChainNode<E, K>* q = new SortedChainNode<E, K>;
    q->data = e;

    // insert node just after tp
    q->link = p;
    if (tp) tp->link = q;
    else first = q;

    return *this;
}

二、散列——有序列表

散列函数->address=Hash(key)：由已知关键字->马上算出唯一地址

散列表（Hash表）：以此构造出来的表或结构

散列表的适用范围：

• key 的取值范围比较宽泛

• 待处理的 key 值不多

• 存储空间有限

• 特别适用于需要快速查找的问题

装填因子= key元素数/散列表长

关键问题一：构造Hash函数

• 好的 Hash 函数

1）确定性：同一关键码总是被映射至同一地址

2）快速计算：复杂度——O(1)

3）满射：尽可能充分地覆盖整个散列空间

4）均匀：关键码映射到散列表各位置的概率尽量接近，可有效避免聚集现象

1.直接定址法：Hash(key)= a * key + b；进行线性运算

2.数字分析法：

设有n个d位数，每一位可能有r种不同的符号。这r种不同符号在各位上出现的频率不一定相同，可能在某些位上分布均匀些，在另一些位上不均匀。

则应根据已知关键字集合的特点，选取出那些分布均匀（冲突较少）的位进行哈希映射

3.平方取中法：取关键字平方后的中间几位为哈希地址

4.折叠法：将关键字分割成位数相同的几部分（最后一部分的位数可以不同），然后取这几部分的叠加和（舍去进位）作为哈希地址

5.除留余数法（最常用的方法）：取关键字被某个不大于哈希表表长m的数p除后所得余数为哈希地址：H(key)=key % p（p<=m）

6.（伪）随机数法

7.多项式法

关键问题二：处理冲突

1.线性开放寻址法

• 线性探测法: 使用某一种散列函数计算出初始散列地址 H 0 ，一旦发生冲突，在表中顺次向后寻找“下一个”空闲位置 H i

• 平方探测法： d=i 2 —— 探测 h(k) 、 h(k)+1 、 h(k)+4 、 …

•双散列法（再哈希法）：需要两个散列函数——第一个散列函数计算关键字的首选地址，一旦发生冲突，用第二个散列函数计算到下一地址的增量；或者直接计算下一个地址

线性探测法：

搜索：从h(k)开始顺序检查，直到某个桶满足：
关键字与目标关键字相同，搜索成功

空桶或回到 h(k) ，搜索失败
删除：懒惰删除——仅做删除标记

带有删除标记的桶所扮演的“角色”，因具体的操作类型而异

1）查找词条时，被视作“必不匹配的非空桶”，查找链在此得以延续

2）插入词条时，被视作“必然匹配的空闲桶”，可以用来存放新词条

缺点：

1.聚集问题：以往的冲突，会导致后续的冲突。 h(k 1 )= i ， h(k 2 )=j ， k 1 可能占据 k 2 的 hash 表位置，从而可能在局部造成严重的聚集，性能急剧下降，即便 hash 表还很空

2.而当表较满时，性能几乎一定会很差

优点：

1.简单

2.只要表不满，总可以找到空位，插入成功

3.无需附加的（指针、链表或溢出区等）空间查找链具有局部性，可充分利用系统缓存，有效减少I/O

（有点技巧的代码，删除部分待补充）：

#include<iostream>
using namespace std;

//hashtable 类
template<class E, class K>
class HashTable {
public:
    HashTable(int divisor = 11);//除数divisor
    ~HashTable() { delete[] ht;  delete[] empty; }
    bool Search(const K& k, E& e) const;
    HashTable<E, K>& Insert(const E& e);
    void Output();// output the hash table
private:
    int hSearch(const K& k) const;
    int m; // 桶的个数（即除数）
    E* ht; // hash table array
    bool* empty; // 是否是空位
};

//构造函数
template<class E, class K>
HashTable<E, K>::HashTable(int divisor)
{// Constructor.
    m = divisor;

    // allocate hash table arrays
    ht = new E[m];
    empty = new bool[m];

    // set all buckets to empty
    for (int i = 0; i < m; i++)
        empty[i] = true;
}

//辅助函数hsearch
template<class E, class K>
int HashTable<E, K>::hSearch(const K& k) const//k是查找的元素
{// Search an open addressed table.
 // Return location of k if present.
 // Otherwise return insert point if there is space.
    int i = k % m; // k本应该放进的桶
    int j = i;     // start at home bucket
    do 
    {
        if (empty[j] || ht[j] == k) return j;//可插入j或者在j重复了
        j = (j + 1) % m;  // 不直接+1，这样可以实现循环找到对应的桶
    } 
    while (j != i); 
    // 经过一遍遍历后，又回到了本应放进的桶，即没找到位置放k
    return j;  // table full
}

//插入操作
template<class E, class K>
HashTable<E, K>& HashTable<E, K>::Insert(const E& e)//在ht中插入e
{// Hash table insert.
    K k = e; // extract key
    int b = hSearch(k);
    // check if insert is to be done
    if (empty[b]) {
        empty[b] = false;
        ht[b] = e;
        return *this;
    }

    // no insert, check if duplicate or full
    if (ht[b] == k) throw BadInput(); // 相同的数重复插入
    throw NoMem(); // 列已经满了
    return *this;  // Visual C++ needs this line
}

//搜索函数
template<class E, class K>
bool HashTable<E, K>::Search(const K& k, E& e) const
{// Put element that matches k in e.
 // Return false if no match.
    int b = hSearch(k);
    if (empty[b] || ht[b] != k) return false;
    e = ht[b];
    return true;
}

2.链表法（拉链法[链地址法]）

与1不同，遇到冲突时不是向后寻找空白桶，而是在当前桶链接成一个链表

#include<iostream>
using namespace std;

//chain hash table
template<class E, class K>
class ChainHashTable {
public:
    ChainHashTable(int divisor = 11)
    {
        m = divisor; ht = new SortedChain<E, K>[m];
    }
    ~ChainHashTable() { delete[] ht; }
    bool Search(const K& k, E& e) const
    {
        return ht[k % m].Search(k, e);
    }
    ChainHashTable<E, K>& Insert(const E& e)
    {
        ht[e % m].DistinctInsert(e); return *this;
    }
    ChainHashTable<E, K>& Delete(const K& k, E& e)
    {
        ht[k % m].Delete(k, e); return *this;
    }
    void Output() const;   // output the table
private:
    int m;                 // divisor
    SortedChain<E, K>* ht;  // array of chains
};

3.公共溢出区法

单独开辟一块连续空间，发生冲突的词条，顺序存入此区域

三、跳表

降低了数组查找复杂度：o(logn)

插入：注意如果在n~m间插入若干数据，则上一层要多出(m-n)/2个结点，每上一层依次

#include<iostream>
using namespace std;

template <typename T> 
class Quadlist
{ //四联表
private: 
	int _size; //规模
	QuadlistNodePosi(T) header, trailer; //哨兵
protected:
	void init(); int clear(); //初始化、清除所有节点
public:
	QuadlistNodePosi(T) first() const
	{ return header->succ; } //首节点
	QuadlistNodePosi(T) last() const
	{ return trailer->pred; } //末节点
	T remove(QuadlistNodePosi(T) p); //删除p
	QuadlistNodePosi(T) insertAfterAbove //插入数据项e，使之成为p的后继，以及b的上邻
	(T const& e, QuadlistNodePosi(T) p, QuadlistNodePosi(T) b = NULL);
};
	
template <typename K, typename V> 
class Skiplist : public Dictionary<K, V>, public List<Quadlist<Entry<K, V>*>> 
{ //多重继承
protected:  
	bool skipSearch(ListNode<Quadlist<Entry<K, V>>*>* &qlist,
QuadlistNode<Entry<K, V>>* &p, K& k);
public:
	int size() //词条总数，即底层Quadlist的规模
	{return empty() ? 0 : last()->data->size();}
	int level() { return List::size(); } //层高，即Quadlist总数
	bool put(K, V); //插入（Skiplist允许词条重复，故必然成功）
	V* get(K k); //读取（基于skipSearch()直接实现）
	bool remove(K k); //删除
};
//查找
template <typename K, typename V> bool Skiplist<K, V>::skipSearch
(ListNode<Quadlist<Entry<K, V>>*>*& qlist, //从指定层qlist的
	QuadlistNode<Entry<K, V>>* &p, //首节点p出发   
	K& k) { //向右、向下查找目标关键码k
	while (true) { //在每一层
		while (p->succ && (p->entry.key <= k)) //从前向后查找
			p = p->succ; //直到出现更大的key或溢出至trailer
		p = p->pred; //再倒回一步，即可判断是否命中
		if (p->pred && (k == p->entry.key)) return true; //命中则成功返回
		qlist = qlist->succ; //否则转入下一层
		if (!qlist->succ) return false; //若已到穿透底层，则意味着失败 
		p = (p->pred) ? //否则转至
			p->below : qlist->data->first(); //当前塔的下一节点
	}
}
//插入
template <typename K, typename V>
bool Skiplist<K, V>::put(K k, V v) {
	Entry<K, V> e = Entry<K, V>(k, v); //将被随机地插入多个副本的新词条
	if (empty()) insertAsFirst(new Quadlist<Entry<K, V>>); //首个Entry
	ListNode<Quadlist<Entry<K, V>>*>* qlist = first(); //从顶层列表的
	QuadlistNode<Entry<K, V>>* p = qlist->data->first(); //首节点开始
	if (skipSearch(qlist, p, k)) //查找适当的插入位置
		while (p->below) p = p->below; //若已有雷同词条，则需强制转到塔底
	qlist = last(); //以下，紧邻于p的右侧，一座新塔将自底而上逐层生长
	QuadlistNode<Entry<K, V>>* b //新节点b即为
		= qlist->data->insertAfterAbove(e, p); //新塔的基座
	while (rand() % 2) { //经投掷硬币，若确定新塔需要再长高一层，则
		while (qlist->data->valid(p) && !p->above) //找出不低于此高度的
			p = p->pred; //最近前驱
		if (!qlist->data->valid(p)) { //若该前驱是header
			if (qlist == first()) //且当前已是最顶层，则意味着必须
				insertAsFirst(new Quadlist<Entry<K, V>>); //先创建新层，再
			p = qlist->pred->data->first()->pred; //将p转至上一层的header
		}
		else //否则，可径自
			p = p->above; //将p提升至该高度
		qlist = qlist->pred; //上升一层，并在该层将新节点
		b = qlist->data->insertAfterAbove(e, p, b); //插至p之后、b之上
	}
	return true; //Skiplist允许重复元素，故插入必成功
} //留意：得益于哨兵的设置，哪些环节被简化了？

oj——求整数最大间隔-性能

问题：总是忘记考虑桶为0的相关情况

#include<iostream>
using namespace std;

int seed,n;
int rand() 
{ 
	return(((seed = seed * 214013L + 2531011L) >> 16) & 0x7fff); 
}
int rand32()
{
	return ((rand() << 16) + (rand() << 1) + rand() % 2);
}

int maxgap(int a[])
{
	int amax = a[0], amin = a[0];//数组最大、最小值
	for (int i = 0; i < n; i++)
	{
		if (a[i] > amax)amax = a[i];
		if (a[i] < amin)amin = a[i];
	}
	if (amax == amin)return 0;

	bool* bucket = new bool[n];
	for (int i = 0; i < n; i++)bucket[i] = 0;
	int* imax = new int[n];
	int* imin = new int[n];//n个数中间分成了n-1段
	double gap = double(amax - amin) / n - 1;
	//均匀分布的gap，最大间隔一定比这个大！
	for (int i = 0; i < n; i++)
	{
		//类似寻找合适的哈希函数部分
		//最重要的部分！
		int index = int((a[i] - amin) / gap);
		//a[i]是乱序：这样可以确定到最小点距离差不多的a[i]放在同号桶
		if (bucket[index] == 0)//忘记了！！！
		{
			imax[index] = a[i];
			imin[index] = a[i];
			bucket[index] = 1;
		}
		else
		{
			//寻找一个桶中的 最大最小值
			imax[index] == max(a[i], imax[index]);
			imin[index] == min(a[i], imin[index]);
		}
	}
	int lastmax;
	int maxgap = 0;
	//找到第一个非空桶
	for (int i = 0; i < n; i++)
	{
		if (bucket[i] != 0)
		{
			lastmax = imax[i];
			break;
		}
	}

	for (int i = 1; i <= n; i++)
	{
		if (bucket[i] != 0)//忘记了！！！
		{
			maxgap = max(maxgap, imin[i] - lastmax);
			lastmax = imax[i];
		}
	}
	return maxgap;
}
int main()
{
	cin >> n >> seed;
	int* np = new int[n];
	for (int i = 0; i < n; i++)
		np[i] = rand32();//随即得到n个数字
	cout << maxgap(np);
	return 0;
}