数据结构与算法（陈越版）第五讲（树下）树的应用——集合及其运算

本文链接：https://blog.csdn.net/lxx333666/article/details/118371315

这篇博客介绍了如何使用树结构来表示集合，并探讨了集合的基本运算，如查找和合并。针对并查集操作导致的树高问题，提出了按照秩的合并算法和路径压缩优化，以降低查找复杂度并保持树的平衡。通过示例代码展示了这两种优化方法在实际应用中的效果。

摘要由CSDN通过智能技术生成

数据结构与算法（陈越版）第五讲（树下）树的应用——集合及其运算

一、集合的表示
- 1.1、集合的表示
- 1.2、集合的储存
二、集合的运算

一、集合的表示

集合是一种常见的数据表示方式。集合的运算包括交，并，补，差以及判定一个数据是否是某个集合的元素。
在这里插入图片描述
为了解决上述问题就需要用到集合，准确的说是集合的运算，因此就需要合理的进行集合的表示，既然要对集合表示那就需要考虑集合存储实现。

1.1、集合的表示

为了有效地对集合执行各种操作，可以用树结构表示集合：用树根代表这个集合，数的每个结点代表一个集合元素。

采用树结构的好处：判定一个数据是否属于一个集合是很方便的（参考树中查找元素）
在这里插入图片描述
如上图所示：与之前的父子关系指针不同，这里结点的指针不是从父亲指向儿子，而是儿子指向父亲。

1.2、集合的储存

既然是树结构那么就可以用数组进行储存。
在这里插入图片描述

typedef struct
{
   ElementType Data;
   int parent;
}SetType;

二、集合的运算

这里主要是处理并查集，所以只考虑如何查以及如何并。

2.1查找以及普通并

# include <iostream>
# include <malloc.h>
# define MaxSize 100          // 切记宏定义没有分号  # define MaxSize 100; 这样在下面使用MaxSize，是不可用的。
using namespace std;

typedef int ElementType;

typedef struct
{
	ElementType Data;   // 存值
	int parent;         // 指向父结点  也就是下标
}SetType;

// 查找操作  查找某个元素是否是某个集合的元素，如果是返回其根结点在数组中的下标
int Find(SetType s[], ElementType X)
{
	int i; 
	for (i = 0; i < MaxSize && s[i].Data != X; i++);
	if (MaxSize <= i)       // 没找打返回-1
		return -1;
	// 找到了
	for (; s[i].parent >= 0; i = s[i].parent);
	return i;
}

// 并操作
void Union(SetType s[], ElementType x1, ElementType x2)
{
	int root1 = Find(s, x1);    // 查询x1属于的树
	int root2 = Find(s, x2);    // 查询x2属于的树
	if (root2 != root1)
		s[root1].parent = root2;
}

int main()
{
	// 创建储存数组的集合
	SetType s[MaxSize];
	// 初始话数组，父结点都指向-1
	for (int i = 0; i < MaxSize; ++i)
	{
		s[i].Data = i;
		s[i].parent = -1;
	}

	cout << Find(s, 5) << endl;            // 随机查看一个元素属于那棵树

	// 并操作
	Union(s, 1, 2);
	Union(s, 2, 3);
	Union(s, 3, 4);
	cout << Find(s, 3) << endl;
	cout << Find(s, 2) << endl;
	cout << Find(s, 4) << endl;
	return 0;
}

但是上述并操作存在一个很严重的问题，就是会导致树越来越长，树的高度为N的树。

假设有 $N$ 个各自独立的元素：

合并1和0所在的集合：Union(s, 1, 0）的结果是生成根为1，高度为2的树；
合并2和0所在的集合：Union(s, 2, 1）的结果是生成根为2，高度为3的树；
…
合并 $N$ 和0所在的集合：Union(s, N, N-1）的结果是生成根为 $N$ ，高度为 $N + 1$ 的树；

此图是4个结点，进行并操作的所构成的树，所以在执行查找的过程计算复杂度为 $O(N^2)$ 。

查找算法已经确定，而并算法可以优化。

2.2按照秩的合并算法

算法思想是：将小规模的树挂在大规模的树上。树根父结点下标为所含结点个数的负值。

# include <iostream>
# include <malloc.h>
# define MaxSize 100          // 切记宏定义没有分号  # define MaxSize 100; 这样在下面使用MaxSize，是不可用的。
using namespace std;

typedef int ElementType;

typedef struct
{
	ElementType Data;   // 存值
	int parent;         // 指向父结点  也就是下标
}SetType;

// 查找操作  查找某个元素是否是某个集合的元素，如果是返回其根结点在数组中的下标
int Find(SetType s[], ElementType X)
{
	int i; 
	for (i = 0; i < MaxSize && s[i].Data != X; i++);
	if (MaxSize <= i)       // 没找打返回-1
		return -1;
	// 找到了
	for (; s[i].parent >= 0; i = s[i].parent);
	return i;
}

// 并操作
void Union(SetType s[], ElementType x1, ElementType x2)
{
	int root1 = Find(s, x1);    // 查询x1属于的树
	int root2 = Find(s, x2);    // 查询x2属于的树
	if (root2 != root1)
		s[root1].parent = root2;
}

// 按秩合并
void Union1(SetType s[], ElementType x1, ElementType x2)
{
	int root1 = Find(s, x1);
	int root2 = Find(s, x2);

	if (s[root1].parent <= s[root2].parent)      // 通过比较根结点的父下标
	{   // 负数比较，小的所含结点多，树规模大   root2挂在root1树下
		s[root1].parent += s[root2].parent;          // 合并后更新结点数量
		s[root2].parent = root1;
	}
	else
	{
		s[root2].parent += s[root1].parent;          
		s[root1].parent = root2;
	}
}

int main()
{
	// 创建储存数组的集合
	SetType s[MaxSize];
	// 初始话数组，父结点都指向-1
	for (int i = 0; i < MaxSize; ++i)
	{
		s[i].Data = i;
		s[i].parent = -1;
	}

	cout << Find(s, 5) << endl;            // 随机查看一个元素属于那棵树

	// 按秩并操作
	// 树1
	Union1(s, 0, 1);
	Union1(s, 1, 2);
	Union1(s, 2, 3);
	Union1(s, 3, 4);
	cout << Find(s, 3) << endl;
	cout << Find(s, 2) << endl;
	cout << Find(s, 4) << endl;
	cout << Find(s, 1) << endl;     // 返回都是根结点0
	cout << "树1的结点数： " << s[0].parent << endl;   // 树1 五个结点  所以输出为-5

	// 树2
	Union1(s, 50, 49);
	Union1(s, 51, 50);
	cout << Find(s, 49) << endl;
	cout << Find(s, 51) << endl;    // 返回都是根结点50
	cout << "树2的结点数： " << s[50].parent << endl;  // 树2 三个结点  所以输出为-3

	// 合并两棵树
	Union1(s, 4, 49);
	cout << Find(s, 4) << endl;
	cout << Find(s, 49) << endl;    // 返回都是根结点0
	cout << "合并树的结点数： " << s[0].parent << endl;  // 共有8个结点 输出为-8

	return 0;
}

在这里插入图片描述
上图是按秩合并算法的图解。

2.3路径压缩优化

查找不可避免的越查越深，路径压缩可以把待查找结点与根结点之间的一系列结点的上一结点都变为根结点，即当查找 D 后：

// 查找
# include <iostream>
# include <malloc.h>
# define MaxSize 100          // 切记宏定义没有分号  # define MaxSize 100; 这样在下面使用MaxSize，是不可用的。
using namespace std;

typedef int ElementType;

typedef struct
{
	ElementType Data;   // 存值
	int parent;         // 指向父结点  也就是下标
}SetType;

// 查找操作  查找某个元素是否是某个集合的元素，如果是返回其根结点在数组中的下标
int Find(SetType s[], ElementType X)
{
	int i; 
	for (i = 0; i < MaxSize && s[i].Data != X; i++);
	if (MaxSize <= i)       // 没找打返回-1
		return -1;
	// 找到了
	for (; s[i].parent >= 0; i = s[i].parent);
	return i;
}

// 并操作
void Union(SetType s[], ElementType x1, ElementType x2)
{
	int root1 = Find(s, x1);    // 查询x1属于的树
	int root2 = Find(s, x2);    // 查询x2属于的树
	if (root2 != root1)
		s[root1].parent = root2;
}

// 按秩合并
void Union1(SetType s[], ElementType x1, ElementType x2)
{
	int root1 = Find(s, x1);
	int root2 = Find(s, x2);

	if (s[root1].parent <= s[root2].parent)      // 通过比较根结点的父下标
	{   // 负数比较，小的所含结点多，树规模大   root2挂在root1树下
		s[root1].parent += s[root2].parent;          // 合并后更新结点数量
		s[root2].parent = root1;
	}
	else
	{
		s[root2].parent += s[root1].parent;          
		s[root1].parent = root2;
	}
}

// 路径压缩查找方式
int Find1(SetType s[], ElementType x)
{
	int i = 0;
	for (; i < MaxSize && s[i].Data != x; ++i);

	if (s[i].parent < 0)
		return x;
	else
		return s[i].parent = Find1(s, s[i].parent);

}
int main()
{
	// 创建储存数组的集合
	SetType s[MaxSize];
	// 初始话数组，父结点都指向-1
	for (int i = 0; i < MaxSize; ++i)
	{
		s[i].Data = i;
		s[i].parent = -1;
	}

	cout << Find(s, 5) << endl;            // 随机查看一个元素属于那棵树

	// 并操作
	Union(s, 0, 1);
	Union(s, 1, 2);
	Union(s, 2, 3);
	Union(s, 3, 4);
	cout << s[0].parent << endl;        // 0父结点在数组中下标为：1
	cout << s[1].parent << endl;        // 1父结点在数组中下标为：2
	cout << s[2].parent << endl;        // 2父结点在数组中下标为：3
	cout << s[3].parent << endl;        // 3父结点在数组中下标为：4

	int root = Find1(s, 0);
	cout << s[0].parent << endl;        // 0父结点在数组中下标为：4
	cout << s[1].parent << endl;        // 1父结点在数组中下标为：4
	cout << s[2].parent << endl;        // 2父结点在数组中下标为：4
	cout << s[3].parent << endl;        // 3父结点在数组中下标为：4

	return 0;
}

在这里插入图片描述
路径压缩优化查找图解。（需要注意的是，这里我是故意设置了，元素与其在数组中元素的下标相同，无视了元素查找其在数组中的下标，如果这两者不相同，则需要先设置一个查找函数，先把元素所在下标找到）

// 返回元素在数组中的下标
int Find_X_(SetType s[], ElementType x)
{   
    int i = 0;
    for(; i < MaxSize && s[i].Data != X; ++i);
    return i;
}

// 输出改成 
cout << s[Find_X_(s,0)].parent << endl;