并查集是一种树形的数据结构,用于处理一些不相交集合的合并及查询问题。常常在使用中以森林来表示。判断亲戚关系是并查集的一个应用:若某个家族过于庞大,要判断两个人是否是亲戚,现在给出某个亲戚关系图,求任意给出的两个人是否具有亲戚关系;规定x和y是亲戚,y和z是亲戚,那么x和z也是亲戚;如果x,y是亲戚,那么x的亲戚都是y的亲戚,y的亲戚也都是x的亲戚。
并查集可以表示成如下图所示的数据结构:
实现并查集的核心算法是查找(FIND)和合并(UNION)。查找算法是递归的查找某节点的父节点直到找到根节点为止,而合并算法是将两棵树合并为一棵树。在这个过程中有一个明显的缺点,合并的时候在极端情况下可能会造成树的高度过高,而导致查找时效率过低。对于此,可以对查找和合并进行优化,采用按秩(rank)合并,和带路径压缩的查找。
下面是带路径压缩的查找算法:
算法:FIND
输入:节点x
输出:root(x),包含x的树的根
y ← x while p(y) != null y ← p(y) end while root ← y y ← x while p(y) != null w ← p(y) p(y) ← root y ← w end while return root
下面是按秩(rank)合并的合并算法:
算法:UNION
输入:两个元素x和y
输出:包含x和y的两个树的合并,原来的树被破坏
u ← FIND(x) v ← FIND(y) if rank(u) <= rank(v) then p(u) ← v if rank(u) == rank(v) then rank(v) ← rank(v) + 1 else p(v) ← u end if
下面是C++的实现:
#include <iostream>
#define Type Set
using namespace std;
struct Set {
int parent;
int rank;
};
int Find(Type *a, int x) {
int y = x;
while(a[y].parent != 0) {
y = a[y].parent;
}
int root = y;
y = x;
while(a[y].parent != 0) {
int w = a[y].parent;
a[y].parent = root;
y = w;
}
return root;
}
void Union(Type *a, int x, int y) {
int u = Find(a, x);
int v = Find(a, y);
if(a[u].rank <= a[v].rank) {
a[u].parent = v;
if(a[u].rank == a[v].rank) {
a[v].rank ++;
}
} else {
a[v].parent = u;
}
}
void Initialize(Type *a, int n) {
for (int i = 1; i <= n;i ++) {
a[i].parent = 0;
a[i].rank = 0;
}
}
void Show(Type *a, int n) {
for (int i = 1; i <= n;i ++) {
cout << i << " -> " << a[i].parent << " rank: " << a[i].rank << endl;
}
cout << "----------------------------------------\n";
}
int main() {
Type S[10];
Initialize(S, 9);
Show(S, 9);
Union(S, 1, 2);
Show(S, 9);
Union(S, 3, 4);
Show(S, 9);
Union(S, 5, 6);
Show(S, 9);
Union(S, 7, 8);
Show(S, 9);
Union(S, 2, 4);
Show(S, 9);
Union(S, 8, 9);
Show(S, 9);
Union(S, 6, 8);
Show(S, 9);
Find(S, 5);
Show(S, 9);
Union(S, 4, 8);
Show(S, 9);
Find(S, 1);
Show(S, 9);
getchar();
return 0;
}
然后是Java实现:
import java.util.ArrayList;
import java.util.HashMap;
public class DisjointSet {
private ArrayList<Integer> parent = new ArrayList<Integer>();
private ArrayList<Integer> rank = new ArrayList<Integer>();
//private ArrayList<String> content = new ArrayList<String>();
private HashMap<String, Integer> content = new HashMap<String, Integer>();
/**
* 初始化哈函数
* @param contents 所有的元素
*/
public DisjointSet(ArrayList<String> contents) {
//将第0个元素置0,使集合从1号元素开始
this.parent.clear();
this.rank.clear();
this.parent.add(0);
this.rank.add(0);
for (int i = 0;i < contents.size(); i ++) {
this.parent.add(0);
this.rank.add(0);
this.content.put(contents.get(i), this.parent.size()-1);
}
}
private void union(int x, int y) {
int u = this.find(x);
int v = this.find(y);
if (this.rank.get(u) <= this.rank.get(v)) {
this.parent.set(u, v);
if (this.rank.get(u) == this.rank.get(v)) {
this.rank.set(v, this.rank.get(v)+1);
}
} else {
this.parent.set(v, u);
}
}
private int find(int x) {
int y = x;
while (this.parent.get(y) != 0) {
y = this.parent.get(y);
}
int root = y;
y = x;
while (this.parent.get(y) != 0) {
int w = this.parent.get(y);
this.parent.set(y, root);
y = w;
}
return root;
}
/**
* 判断a和b是否属于一个集合
* @param a
* @param b
* @return 若a和b均是集合元素且属于同一个集合,则返回true,否则返回false
*/
public boolean isSameSet(String a, String b) {
if (this.content.containsKey(a) != true || this.content.containsKey(b) != true) {
return false;
}
int u = this.find(this.content.get(a));
int v = this.find(this.content.get(b));
return u == v;
}
/**
* 将a和b合并成一个集合
* @param a
* @param b
*/
public void join(String a, String b) {
if (this.content.containsKey(a) != true || this.content.containsKey(b) != true) {
return;
}
this.union(this.content.get(a), this.content.get(b));
}
/**
* 添加新的集合元素
* @param a
*/
public void add(String a) {
if (this.content.containsKey(a)) {
return;
}
this.parent.add(0);
this.rank.add(0);
this.content.put(a, this.parent.size()-1);
}
/**
* @param args
*/
public static void main(String[] args) {
// TODO Auto-generated method stub
System.out.println("假设Sheldon、Amy和Raj属于一个集合,Howard和Bernadette属于一个集合,Leonard和Penny属于一个集合");
String [] names = {"Sheldon", "Amy", "Raj", "Howard", "Bernadette", "Leonard", "Penny"};
ArrayList<String> set = new ArrayList<String>();
for (int i = 0; i < names.length; i++) {
set.add(names[i]);
}
DisjointSet ds = new DisjointSet(set);
ds.join("Sheldon", "Amy");
ds.join("Amy", "Raj");
ds.join("Howard", "Bernadette");
ds.join("Leonard", "Penny");
System.out.print("Sheldon和Raj属于一个集合吗? ");
System.out.println(ds.isSameSet("Sheldon", "Raj"));
System.out.print("Howard和Penny属于一个集合吗? ");
System.out.println(ds.isSameSet("Howard", "Penny"));
}
}
最后是Python版本:
#! /usr/bin/env python
# -*- coding:utf-8 -*-
class DisjointSet:
def __init__(self, contents):
self.parent = [0]
self.rank = [0]
self.content = dict()
for i in contents:
self.parent.append(0)
self.rank.append(0)
self.content[i] = len(self.parent)-1
def union(self, x, y):
u = self.find(x)
v = self.find(y)
if self.rank[u] <= self.rank[v]:
self.parent[u] = v
if self.rank[u] == self.rank[v]:
self.rank[v] = self.rank[v]+1
else:
self.parent[v] = u
def find(self, x):
y = x
while self.parent[y] != 0:
y = self.parent[y]
root = y
y = x
while self.parent[y] != 0:
w = self.parent[y]
self.parent[y] = root
y = w
return root
def isSameSet(self, a, b):
if self.content.has_key(a) != True or self.content.has_key(b) != True:
return False
u = self.find(self.content[a])
v = self.find(self.content[b])
return u == v
def join(self, a, b):
if self.content.has_key(a) != True or self.content.has_key(b) != True:
return
self.union(self.content[a], self.content[b])
def add(self, a):
if self.content.has_key(a):
return
self.parent.append(0)
self.rank.append(0)
self.content[i] = len(self.parent)-1
if __name__ == '__main__':
print '假设Sheldon、Amy和Raj属于一个集合,Howard和Bernadette属于一个集合,Leonard和Penny属于一个集合'
names = ["Sheldon", "Amy", "Raj", "Howard", "Bernadette", "Leonard", "Penny"]
ds = DisjointSet(names)
ds.join("Sheldon", "Amy")
ds.join("Amy", "Raj")
ds.join("Howard", "Bernadette")
ds.join("Leonard", "Penny")
print 'Sheldon和Raj属于一个集合吗?', ds.isSameSet("Sheldon", "Raj")
print 'Howard和Penny属于一个集合吗?', ds.isSameSet("Howard", "Penny")