七战数据结构--堆

贩卖纯净水.

已于 2024-04-18 02:30:03 修改

阅读量1k

点赞数 30

分类专栏：数据结构文章标签：算法数据结构排序算法

于 2024-02-11 10:34:36 首次发布

本文链接：https://blog.csdn.net/chestnut_orenge/article/details/136022837

版权

数据结构专栏收录该内容

13 篇文章 2 订阅

订阅专栏

堆总是一棵完全二叉树，一般将数组数据看做一棵完全二叉树。

小堆要求：任意一个父亲<=孩子

大堆要求：任意一个父亲>=孩子

例：

1.下列关键字序列为堆的是：（）

A 100,60,70,50,32,65

B 60,70,65,50,32,100

C 65,100,70,32,50,60

D 70,65,100,32,50,60

E 32,50,100,70,65,60

F 50,100,70,65,60,32

答案：

一般我们说的堆指两个东西，其一是数据结构中的堆（完全二叉树），其二是操作系统中的堆（内存区域的划分，动态内存申请的区域），它们是不同学科里的同名名称。

堆的存在有什么意义呢？

1.堆排序（时间复杂度为O（N*logN））

2.top K问题（找出一组数据中最大/最小的K个值）

tips：堆的功能是选数

实现

声明

typedef int HPDataType;
typedef struct Heap
{
	HPDataType* a;
	int size;
	int capacity;
}HP;

初始化

void HeapInit(HP* php)
{
	assert(php);
	php->a = NULL;
	php->size = 0;
	php->capacity = 0;
}

插入

插入时先将元素插入到堆的末尾（最后一个孩子之后），插入之后如果堆的性质遭到破坏，将新插入节点顺着其双亲向上调整到合适位置即可。

向上调整函数

void AdjustUp(HPDataType* a, int child)
{
	int parent = (child - 1) / 2;
	while(child>0)
	{
		if (a[child] < a[parent])
		{
			Swap(&a[child], &a[parent]);
			child = parent;
			parent = (parent - 1) / 2;
		}
		else
		{
			break;
		}
	}
}

插入函数

void HeapPush(HP* php, HPDataType x)
{
	assert(php);
	if (php->size == php->capacity)
	{
		int newcapacity=php->capacity == 0 ? 4 : php->capacity * 2;
		HPDataType* tmp = (HPDataType*)realloc(php->a,sizeof(HPDataType) * newcapacity);
		if (tmp == NULL)
		{
			perror("realloc fail");
			return;
		}
		php->capacity = newcapacity;
		php->a = tmp;
	}
	php->a[php->size] = x;
	php->size++;
	AdjustUp(php->a, php->size - 1);
}

删除

堆中的删除有什么意义呢？

堆删除删堆顶（最大or最小），可以筛选次小和次大。

那该怎样删除呢？

1.挪动覆盖删除（必不可行，关系全乱套了）

2.首位交换，再尾删（此时左右子树未被破坏），此时使用向下调整算法，交换左树到叶子（完全正确）

交换函数

void Swap(HPDataType* x,HPDataType* y)
{
	HPDataType tmp = *x;
	*x = *y;
	*y = tmp;
}

向下调整函数

void AdjustDown(HPDataType* a, int size, int parent)
{
	int child = parent * 2 + 1;
	while (child < size)
	{
		//假设左孩子小，假设错则更新
		if (child + 1 < size && a[child + 1] < a[child])
		{
			++child;
		}
		if (a[child] < a[parent])
		{
			Swap(&a[child], &a[parent]);
			parent = child;
			child = child * 2 + 1;
		}
		else
		{
			break;
		}
	}
}

删除函数

void HeapPop(HP* php)
{
	assert(php);
	assert(php->size > 0);
	Swap(&php->a[0], &php->a[php->size - 1]);
	php->size--;
	AdjustDown(php->a, php->size, 0);
}

判空

bool HeapEmpty(HP* php)
{
	assert(php);
	return php->size == 0;
}

有效数据个数

size_t HeapSize(HP* php)
{
	assert(php);
	return php->size;
}

查看堆顶元素

HPDataType* HeapTop(HP* php)
{
	return php->a[0];
}

销毁

void HeapDestroy(HP* php)
{
	free(php);
	php = NULL;
	php->size = php->capacity = 0;
}

完整代码

Heap.h

#pragma once
#include<stdbool.h>
#include<stdio.h>
#include<assert.h>
#include<stdlib.h>
typedef int HPDataType;
typedef struct Heap
{
	HPDataType* a;
	int size;
	int capacity;
}HP;
void HeapInit(HP* php);
void HeapDestroy(HP* php);
void HeapPush(HP* php, HPDataType x);
//规定删除堆顶数据
void HeapPop(HP* php);
HPDataType* HeapTop(HP* php);
size_t HeapSize(HP* php);
bool HeapEmpty(HP* php);

Heap.c

#include"Heap.h"
void HeapInit(HP* php)
{
	assert(php);
	php->a = NULL;
	php->size = 0;
	php->capacity = 0;
}
void HeapDestroy(HP* php)
{
	free(php);
	php = NULL;
	php->size = php->capacity = 0;
}
void Swap(HPDataType* x,HPDataType* y)
{
	HPDataType tmp = *x;
	*x = *y;
	*y = tmp;
}
void AdjustUp(HPDataType* a, int child)
{
	int parent = (child - 1) / 2;
	while(child>0)
	{
		if (a[child] < a[parent])
		{
			Swap(&a[child], &a[parent]);
			child = parent;
			parent = (parent - 1) / 2;
		}
		else
		{
			break;
		}
	}
}
void HeapPush(HP* php, HPDataType x)
{
	assert(php);
	if (php->size == php->capacity)
	{
		int newcapacity=php->capacity == 0 ? 4 : php->capacity * 2;
		HPDataType* tmp = (HPDataType*)realloc(php->a,sizeof(HPDataType) * newcapacity);
		if (tmp == NULL)
		{
			perror("realloc fail");
			return;
		}
		php->capacity = newcapacity;
		php->a = tmp;
	}
	php->a[php->size] = x;
	php->size++;
	AdjustUp(php->a, php->size - 1);
}
void AdjustDown(HPDataType* a, int size, int parent)
{
	int child = parent * 2 + 1;
	while (child < size)
	{
		//假设左孩子小，假设错则更新
		if (child + 1 < size && a[child + 1] < a[child])
		{
			++child;
		}
		if (a[child] < a[parent])
		{
			Swap(&a[child], &a[parent]);
			parent = child;
			child = child * 2 + 1;
		}
		else
		{
			break;
		}
	}
}
void HeapPop(HP* php)
{
	assert(php);
	assert(php->size > 0);
	Swap(&php->a[0], &php->a[php->size - 1]);
	php->size--;
	AdjustDown(php->a, php->size, 0);
}
HPDataType* HeapTop(HP* php)
{
	return php->a[0];
}
size_t HeapSize(HP* php)
{
	assert(php);
	return php->size;
}
bool HeapEmpty(HP* php)
{
	assert(php);
	return php->size == 0;
}

test.c

#include"Heap.h"
int main()
{
	int arr[] = { 4,6,2,1,5,8,2,9 };
	HP hp;
	HeapInit(&hp);
	for (int i = 0; i < sizeof(arr) / sizeof(arr[0]); i++)
	{
		HeapPush(&hp, arr[i]);
	}
	for (int i = 0; i < sizeof(arr) / sizeof(arr[0]); i++)
	{
		printf("%d ", hp.a[i]);
	}
	printf("\n");
	int k = 3;
	while (k>0)
	{
		printf("%d ", HeapTop(&hp));
		HeapPop(&hp);
		k--;
	}
	return 0;
}

应用

堆排序

堆排序需要先建堆，（模拟堆插入过程建堆）插入建堆：

for (int i = 0; i < n; i++)
{
	AdjustUp(a, i);
}

排升序要建大堆，排降序要建小堆

解释：堆排序本质上是一个根据堆特性设计的选择排序，建小堆能选出最小的数，怎么筛选出次小的呢？剩下的数关系混乱，只能重新建堆，代价过大。

正确做法应该是建立大堆，然后首位交换，交换后将末尾数不看做堆中元素，继续向下调整交换第二次，

建堆的时间复杂度： $eq?N*logN$

选数的时间复杂度： $eq?%28N-1%29*logN$

堆排序的时间复杂度： $eq?N*logN$

虽然表面上差的不是很多，但是实际上差别可是很大的（堆排序性能真牛杯）：

N 1000 100W

冒泡排序： $eq?O%28N%5E2%29$ 100W 1万亿

堆排序 $eq?O%28N*logN%29$ 1W 2000W

下面以升序排列为例演示堆排序：

建堆

void AdjustUp(HPDataType* a, int child)
{
	int parent = (child - 1) / 2;
	while (child > 0)
	{
		if (a[child] > a[parent])
		{
			Swap(&a[child], &a[parent]);
			child = parent;
			parent = (parent - 1) / 2;
		}
		else
		{
			break;
		}
	}
}

选数

void AdjustDown(HPDataType* a, int size, int parent)
{
	int child = parent * 2 + 1;
	while (child < size)
	{
		//假设左孩子大，假设错则更新
		if (child + 1 < size && a[child + 1] > a[child])
		{
			++child;
		}
		if (a[child] > a[parent])
		{
			Swap(&a[child], &a[parent]);
			parent = child;
			child = child * 2 + 1;
		}
		else
		{
			break;
		}
	}
}
void Swap(HPDataType* x, HPDataType* y)
{
	HPDataType tmp = *x;
	*x = *y;
	*y = tmp;
}

完整代码

HeapSort.h

#pragma once
#include<stdio.h>
typedef int HPDataType;
void AdjustDown(HPDataType* a, int size, int parent);
void Swap(HPDataType* x, HPDataType* y);
void AdjustUp(HPDataType* a, int child);
void Heapsort(HPDataType* a, int n);

HeapSort.c

#include"HeapSort.h"
void AdjustDown(HPDataType* a, int size, int parent)
{
	int child = parent * 2 + 1;
	while (child < size)
	{
		//假设左孩子大，假设错则更新
		if (child + 1 < size && a[child + 1] > a[child])
		{
			++child;
		}
		if (a[child] > a[parent])
		{
			Swap(&a[child], &a[parent]);
			parent = child;
			child = child * 2 + 1;
		}
		else
		{
			break;
		}
	}
}
void Swap(HPDataType* x, HPDataType* y)
{
	HPDataType tmp = *x;
	*x = *y;
	*y = tmp;
}
void AdjustUp(HPDataType* a, int child)
{
	int parent = (child - 1) / 2;
	while (child > 0)
	{
		if (a[child] > a[parent])
		{
			Swap(&a[child], &a[parent]);
			child = parent;
			parent = (parent - 1) / 2;
		}
		else
		{
			break;
		}
	}
}
void Heapsort(HPDataType* a,int n)
{
	for (int i = 0; i < n; i++)
	{
		AdjustUp(a, i);
	}
	int end = n-1;
	while (end > 0)
	{
		Swap(&a[end],&a[0]);
		AdjustDown(a,end, 0);
		end--;
	}
	return;
}

test.c

#include"HeapSort.h"
int main()
{
	int arr[9] = { 1,5,6,9,7,4,2,3,8 };
	Heapsort(arr, sizeof(arr) / sizeof(arr[0]));
	for (int i = 0; i < 9; i++)
	{
		printf("%d ", arr[i]);
	}
	return 0;
}

top K问题

TOP-K问题：即求数据结合中前K个最大或最小的元素，一般情况下数据量都比较大。

例如：专业前10名、世界500强、富豪榜、游戏中排行前100的玩家...

对于Top-K问题，能想到的最简单直接的方式就是排序。但是，如果数据量非常大，就不太适合排序(数据量过大时可能数据都不能一下子全部加载到内存中，就算是时间复杂度最优的排序算法也过于复杂，最小是 $eq?N*logN$ )，以堆排序为例：

N个数插入到大堆里面，Pop K次时间复杂度
$eq?N*LogN%20+%20K*logN%20-%3E%20O%28NlogN%29$
N是非常大的数，假设N为100亿，K是10
那么100亿个整数需要多少空间呢?
1G = 1024MB = 1024*1024KB = 1024*1024*1024 byte = 10亿byte

那么一百亿个整数（int类型）需要的空间就为40G
内存不够，排序受限

最佳的方式就是用堆来解决，基本思路如下：

1. 用数据集合中前K个元素来建堆

求前k个最大的元素，建小堆

求前k个最小的元素，建大堆

2. 用剩余的N-K个元素依次与堆顶元素来比较，不满足则替换堆顶元素进堆(替换堆顶值，再向下调整)，将剩余N-K个元素依次与堆顶元素比完之后，堆中剩余的K个元素就是所求的前K个最小或者最大的元素。

时间复杂度: $eq?O%28N*logK%29$ ，当N非常大，K很小时，K的值可以忽略，则时间复杂度约为： $eq?O%28N%29$

制造数据

实现一个制造数据的函数（自制测试用例）：

void CreateDate()
{
	// 造数据
	int n = 10000000;
	srand(time(NULL));    //给种子确保真随机
	const char* file = "data.txt";
	FILE* fin = fopen(file, "w");
	if (fin == NULL)
	{
		perror("fopen error");
		return;
	}
	for (int i = 0; i < n; i++)
	{
		int x = (rand() + i) % 10000000;
		fprintf(fin,"%d\n",x);     //写文件
	}
	fclose(fin);
}

建堆

找前K个最大的数，建小堆：

void AdjustUp(HPDataType* a, int child)
{
	int parent = (child - 1) / 2;
	while (child > 0)
	{
		if (a[child] < a[parent])
		{
			Swap(&a[child], &a[parent]);
			child = parent;
			parent = (parent - 1) / 2;
		}
		else
		{
			break;
		}
	}
}

向下调整

void AdjustDown(HPDataType* a, int size, int parent)
{
	int child = parent * 2 + 1;
	while (child < size)
	{
		//假设左孩子小，假设错则更新
		if (child + 1 < size && a[child + 1] < a[child])
		{
			++child;
		}
		if (a[child] < a[parent])
		{
			Swap(&a[child], &a[parent]);
			parent = child;
			child = child * 2 + 1;
		}
		else
		{
			break;
		}
	}
}

打印

void PrintTopK(const char* file,int k)
{
	FILE* fout = fopen(file, "r");
	if (fout == NULL)
	{
		perror("fopen error");
		return;
	}
	int* minHeap = (int*)malloc(sizeof(int) * k);
	if (minHeap == NULL)
	{
		perror("malloc fail");
		return;
	}
	for (int i = 0; i < k; i++)
	{
		fscanf(fout, "%d", &minHeap[i]);     //读文件
		AdjustUp(minHeap,i);      //建小堆
	}
	int x = 0;
	while (fscanf(fout,"%d", &x)!=EOF)    //将值读取到变量X中
	{
		if (x > minHeap[0])
		{
			minHeap[0] = x;
			AdjustDown(minHeap, k, 0);
		}
	}
	for (int i = 0; i < k; i++)
	{
		printf("%d ", minHeap[i]);
	}
	printf("\n");
	return;
}

为确保正确性，可以在创造完数据后改几个比10000000 大的数：

完整代码

HeapTop.h

#define _CRT_SECURE_NO_WARNINGS 1
#pragma once
#include<stdio.h>
#include<time.h>
#include<stdlib.h>
typedef int HPDataType;
void Swap(HPDataType* x, HPDataType* y);
void AdjustUp(HPDataType* a, int child);
void AdjustDown(HPDataType* a, int size, int parent);
void CreateDate();
void PrintTopK(const char* file, int k);

HeapTop.c

#include"HeapTop.h"
void Swap(HPDataType* x, HPDataType* y)
{
	HPDataType tmp = *x;
	*x = *y;
	*y = tmp;
}
void AdjustUp(HPDataType* a, int child)
{
	int parent = (child - 1) / 2;
	while (child > 0)
	{
		if (a[child] < a[parent])
		{
			Swap(&a[child], &a[parent]);
			child = parent;
			parent = (parent - 1) / 2;
		}
		else
		{
			break;
		}
	}
}
void AdjustDown(HPDataType* a, int size, int parent)
{
	int child = parent * 2 + 1;
	while (child < size)
	{
		//假设左孩子小，假设错则更新
		if (child + 1 < size && a[child + 1] < a[child])
		{
			++child;
		}
		if (a[child] < a[parent])
		{
			Swap(&a[child], &a[parent]);
			parent = child;
			child = child * 2 + 1;
		}
		else
		{
			break;
		}
	}
}
void CreateDate()
{
	// 造数据
	int n = 10000000;
	srand(time(NULL));    //给种子确保真随机
	const char* file = "data.txt";
	FILE* fin = fopen(file, "w");
	if (fin == NULL)
	{
		perror("fopen error");
		return;
	}
	for (int i = 0; i < n; i++)
	{
		int x = (rand() + i) % 10000000;
		fprintf(fin,"%d\n",x);     //写文件
	}
	fclose(fin);
}
void PrintTopK(const char* file,int k)
{
	FILE* fout = fopen(file, "r");
	if (fout == NULL)
	{
		perror("fopen error");
		return;
	}
	int* minHeap = (int*)malloc(sizeof(int) * k);
	if (minHeap == NULL)
	{
		perror("malloc fail");
		return;
	}
	for (int i = 0; i < k; i++)
	{
		fscanf(fout, "%d", &minHeap[i]);     //读文件
		AdjustUp(minHeap,i);      //建小堆
	}
	int x = 0;
	while (fscanf(fout,"%d", &x)!=EOF)    //将值读取到变量X中
	{
		if (x > minHeap[0])
		{
			minHeap[0] = x;
			AdjustDown(minHeap, k, 0);
		}
	}
	for (int i = 0; i < k; i++)
	{
		printf("%d ", minHeap[i]);
	}
	printf("\n");
	return;
}

test.c

#include"HeapTop.h"
int main()
{
	CreateDate();
	PrintTopK("data.txt", 5);
	return 0;
}

向下调整建堆

给出一个数组，这个数组逻辑上可以看做一颗完全二叉树，但是还不是一个堆，通过算法，把它构建成堆：从倒数的第一个非叶子结点（最后一个结点的父亲）开始向下调整，一直调整到根节点的树。

这种方法建堆有两大优势：

1.在堆排序或者TopK问题中只需要这一个向下调整函数就解决问题了

2.建堆的时间复杂度更低，为 $eq?O%28N%29$ ,向上调整的建堆方式时间复杂度为 $eq?O%28N*logN%29$

时间复杂度

向下调整

假设这个堆为满二叉树，树的高度为h，结点个数为N，那我们需要从第h-1层开始向下调整，假设向下调整的累计调整次数为T(h)，T(h)为每一层的个数*每一层向下调整次数的和（按最坏情况计算）

$eq?T%28h%29%3D2%5E%7Bh-2%7D%20*1+2%5E%7Bh-3%7D*2+...+2%5E1*%28h-2%29+2%5E0*%28h-1%29$

由错位相减法可得：

$eq?T%28h%29%3D2%5Eh-1-h$

由于是满二叉树：

$eq?N%3D2%5E%7Bh%7D-1$

$eq?h%3D%5Clog%20_2%28N+1%29$

可得：

$eq?T%28N%29%3DN-%5Clog_2%28N+1%29$

约为N

所以，这种方法建堆的时间复杂度为 $eq?O%28N%29$

代码实例

Heap.h

#pragma once
#include<stdbool.h>
#include<stdio.h>
#include<assert.h>
#include<stdlib.h>
typedef int HPDataType;
void Heap(int* a, int n);

Heap.c

#include"Heap.h"
void Swap(HPDataType* x, HPDataType* y)
{
	HPDataType tmp = *x;
	*x = *y;
	*y = tmp;
}
void AdjustDown(HPDataType* a, int size, int parent)
{
	int child = parent * 2 + 1;
	while (child < size)
	{
		if (child + 1 < size && a[child + 1] < a[child])
		{
			++child;
		}
		if (a[child] < a[parent])
		{
			Swap(&a[child], &a[parent]);
			parent = child;
			child = child * 2 + 1;
		}
		else
		{
			break;
		}
	}
}
void Heap(int* a, int n)
{
	for (int i = (n - 1 - 1) / 2; i >= 0; i--)  //从最后一个非叶子结点开始向下调整
	{
		AdjustDown(a, n, i);
	}
	return;
}

test.c

#include"Heap.h"
int main()
{
	int arr[10] = { 1,5,6,3,2,4,8,9,7 };
	Heap(arr, 9);
	for (int i = 0; i < 9; i++)
	{
		printf("%d ", arr[i]);
	}
	return 0;
}