并查集:用来判断两个元素是否在同一个集合
功能函数:
1.find( i ):查找 i 所在集合的代表元素,用代表元素来代表 i 所在的集合
2.issameset( a ,b ):判断a和b是否在同一个集合里
3.union(a,b):合并a所在的集合和b所在的集合
时间复杂度:各种操作单次调用的均摊时间复杂度为O(1),证明较麻烦,记住即可
并查集的优化:1.扁平化 2.小挂大
并查集模板:
#include <iostream>
using namespace std;
const int maxnum = 10001;
int father[maxnum];每个节点的代表节点
int size1[maxnum];每个节点的集合的大小
int stack[maxnum];//用来收集需要扁平化的节点
int n;//输入的节点数量
void build() {//初始化并查集
for (int i = 0; i < n; i++) {
father[i] = i;
size1[i] = 1;
}
}
int find(int i) { //找节点的代表节点,并在寻找到的过程中进行
int size = 0; //扁平化
while (i != father[i]) {
stack[size++] = i;
i = father[i];
}
while (size > 0) {
father[stack[--size]] = i;
}
return i;
}
bool issameset(int a, int b) {//判断是否在同一个集合里
return find(a) == find(b);
}
void join(int a,int b) {//合并两个集合
int x = find(a);
int y = find(b);
if (x != y) {
if (size1[x] > size1[y]) {
size1[x] += size1[y];
father[y] = x;
}
else {
size1[y] += size1[x];
father[x] = y;
}
}
}
int main() {
cin >> n;
return 0;
}
并查集模板精简版:
精简版不需要进行小挂大的优化(一般小挂大的优化可以省略);进行扁平化时用递归的方法
#include <iostream>
using namespace std;
const int maxnum = 10001;
int father[maxnum];
int n;
void build() {
for (int i = 0; i < n; i++) {
father[i] = i;
}
}
int find(int i) {
if(i != father[i]) {//递归进行扁平化
father[i] = find(father[i]);
}
return father[i];
}
bool issameset(int a, int b) {
return find(a) == find(b);
}
void join(int a,int b) {
father[find(a)] = find(b);
}
int main() {
cin >> n;
return 0;
}
例题:
class Solution {
public:
static const int maxnum = 32;
int father[maxnum];
int n;
void build(int m) {
for (int i = 0; i < m; i++)
father[i] = i;
}
int find(int i) {
if (i != father[i]) {
father[i] = find(father[i]);
}
return father[i];
}
void join(int x, int y) {
int a = find(x);
int b = find(y);
if (a != b) {
father[a] = b;
--n;
}
}
int minSwapsCouples(vector<int>& row) {
n = row.size() / 2;
build(row.size() / 2);
for (int i = 0; i < row.size(); i += 2) {
join(row[i] / 2, row[(i + 1)] / 2);
}
return row.size() / 2 - n;
}
};
如果有两对情侣混在一起,那么只需交换一次就可以排好序;三对混在一起只需交换两次.....,所以如果有n对情侣混在一起,只需要交换n-1次。所以只需将混在一起的情侣放在一个集合里,最后统计所有集合里的情侣数减一求和,就等于所有的情侣对数减集合数
class Solution {
public:
static const int maxnum = 301;
int father[maxnum];
int sets;
void build(int n) {
sets = n;
for (int i = 0; i < n; i++)
father[i] = i;
}
int find(int i) {
if (i != father[i])
father[i] = find(father[i]);
return father[i];
}
void join(int x, int y) {
int a = find(x);
int b = find(y);
if (a != b) {
father[a] = b;
sets--;
}
}
int numSimilarGroups(vector<string>& strs) {
int n = strs.size();
int m = strs[0].size();
build(n);
for (int i = 0; i < n; i++) {
for (int j = i + 1; j < n; j++) {
if (find(i) != find(j)) {
int differ = 0;
for (int k = 0; k < m && differ < 3; k++) {
if (strs[i][k] != strs[j][k])
differ++;
}
if (differ < 3)
join(i, j);
}
}
}
return sets;
}
};
相似的字符串属于一个集合,所以想到用并查集来收集不同的集合;收集字符串时,从第一个字符串开始向后判断与之是否为异位词,两层for循环就可以完全判断
class Solution {
public:
static const int maxnum = 100000;
int father[maxnum];
int sets = 0;
int size;
int get_index(int m, int n) { return size * m + n; }
void build(vector<vector<char>>& grid) {
size = grid[0].size();
int n = grid.size();
int m = grid[0].size();
for (int i = 0; i < n; i++) {
for (int j = 0; j < m; j++) {
if (grid[i][j] == '1') {
sets++;
father[get_index(i, j)] = get_index(i, j);
}
}
}
}
int find(int i) {
if (i != father[i])
father[i] = find(father[i]);
return father[i];
}
void join(int a, int b, int i, int j) {
int x = find(get_index(a, b));
int y = find(get_index(i, j));
if (x != y) {
father[x] = y;
sets--;
}
}
int numIslands(vector<vector<char>>& grid) {
int n = grid.size();
int m = grid[0].size();
build(grid);
for (int i = 0; i < n; i++) {
for (int j = 0; j < m; j++) {
if (grid[i][j] == '1') {
if (j > 0 && grid[i][j - 1] == '1')
join(i, j, i, j - 1);
if (i > 0 && grid[i - 1][j] == '1')
join(i, j, i - 1, j);
}
}
}
return sets;
}
};
对于属于一块陆地的点,同属于同一个集合,所以只需要统计不同集合的数量就可以。但二维数组中每个点由坐标表示,并查集通常串联数,所以从第一行、第一列开始一次标号且标号都是唯一的。有了对应的标号就可以进行并查集了
class Solution {
public:
static const int maxnum=1001;
unordered_map<int,int>row;
unordered_map<int,int>col;
int father[maxnum];
int sets;
void build(int n){
row.clear();
col.clear();
for(int i=0;i<n;i++){
father[i]=i;
}
sets=n;
}
int find(int i){
if(father[i]!=i)
father[i]=find(father[i]);
return father[i];
}
void join(int x,int y){
int a=find(x);
int b=find(y);
if(a!=b){
father[a]=b;
sets--;
}
}
int removeStones(vector<vector<int>>& stones) {
int n=stones.size();
build(n);
for(int i=0;i<n;i++){
int x=stones[i][0];
int y=stones[i][1];
if(row.find(x)!=row.end()){
join(i,row[x]);
}else row[x]=i;
if(col.find(y)!=col.end())
join(i,col[y]);
else col[y]=i;
}
return n-sets;
}
};
对于可以消除对石头,其实就是属于同一个集合里的,想到集合就可以使用并查集结构,但在将两个节点加入时,要判断两个节点是否在同一个集合里,需要用哈希表来记录行、列的信息。最后集合的个数就是剩下的石头的个数
给集合打标签是并查集的常用技巧
class Solution {
public:
static const int maxnum = 100001;
int father[maxnum];
bool sign[maxnum];
void build(int n, int first) {
for (int i = 0; i < n; i++) {
father[i] = i;
sign[i] = false;
}
father[first] = 0;
sign[0] = true;
}
int find(int i) {
if (i != father[i]) {
father[i] = find(father[i]);
}
return father[i];
}
void join(int i, int j) {
int x = find(i);
int y = find(j);
if (x != y) {
father[x] = y;
sign[y] |= sign[x];
}
}
vector<int> findAllPeople(int n, vector<vector<int>>& meetings,
int firstPerson) {
build(n, firstPerson);
sort(meetings.begin(), meetings.end(),
[](vector<int> a, vector<int> b) { return a[2] <= b[2]; });
int m = meetings.size();
for (int l = 0, r; l < m;) {
r = l;
while (r+1<m&&meetings[l][2] == meetings[r+1][2])
r++;
for (int i = l; i <= r; i++)
join(meetings[i][0], meetings[i][1]);
for (int i = l; i <= r; i++) {
int a = meetings[i][0];
int b = meetings[i][1];
if (!sign[find(a)])
father[a] = a;
if (!sign[find(b)])
father[b] = b;
}
l = r + 1;
}
vector<int> ans;
for (int i =0; i < n; i++) {
if (sign[find(i)])
ans.push_back(i);
}
return ans;
}
};
先将会议按时间顺序排序,在同一段时间内,将知道秘密和不知道秘密的人各自形成一个集合;在进行下一个时间段的会议时,将上一次会议不知道秘密的人撤销"加入"操作,最后统计知晓秘密的专家即可。为了区分知道秘密和不知道秘密的专家,需要给专家们打标签,即用一个数组记录哪些专家知晓秘密
class Solution {
public:
static const int maxnum = 30001;
int father[maxnum];
int cnt[maxnum];
void build(int n) {
for (int i = 0; i < n; i++) {
father[i] = i;
cnt[i] = 1;
}
}
int find(int i) {
if (i != father[i])
father[i] = find(father[i]);
return father[i];
}
int join(int i, int j, vector<int>& vals) {
int x = find(i);
int y = find(j);
int path = 0;
if (vals[x] > vals[y]) {
father[y] = x;
} else if (vals[y] > vals[x])
father[x] = y;
else {
path = cnt[x] * cnt[y];
father[y] = x;
cnt[x] += cnt[y];
}
return path;
}
int numberOfGoodPaths(vector<int>& vals, vector<vector<int>>& edges) {
int n = vals.size();
build(n);
sort(edges.begin(), edges.end(), [vals](vector<int> a, vector<int> b) {
return max(vals[a[0]], vals[a[1]]) <= max(vals[b[0]], vals[b[1]]);
});
int ans = n;
for (int i = 0; i < edges.size(); i++) {
ans += join(edges[i][0], edges[i][1], vals);
}
return ans;
}
};
对于图类题目如果设计节点的数值大小,通常从节点数值的大小入手:是从大到小还是从小到大。此题经分析应从节点值小的节点入手,按照节点值的增大依次统计路径并将两个集合合并
class Solution {
public:
static const int maxnum = 301;
int father[maxnum];
int infect[maxnum];
int virus[maxnum];
int cnt[maxnum];
int size[maxnum];
void build(int n, vector<int>& initial) {
for (int i = 0; i < n; i++) {
father[i] = i;
virus[i] = false;
infect[i] = -1;
size[i] = 1;
cnt[i] = 0;
}
for (int i : initial)
virus[i] = true;
}
int find(int i) {
if (i != father[i])
father[i] = find(father[i]);
return father[i];
}
void join(int i, int j) {
int x = find(i);
int y = find(j);
if (x != y) {
father[x] = y;
size[y] += size[x];
}
}
int minMalwareSpread(vector<vector<int>>& graph, vector<int>& initial) {
int n = graph.size();
build(n, initial);
for (int i = 0; i < n; i++) {
for (int j = 0; j < n; j++) {
if (graph[i][j] == 1 && !virus[i] && !virus[j])
join(i, j);
}
}
for (int i : initial) {
for (int j = 0; j < n; j++) {
if (i != j && !virus[j] && graph[i][j] == 1) {
int f = find(j);
if (infect[f] == -1)
infect[f] = i;
else if (infect[j] != -2 && infect[f] != i)
infect[f] = -2;
}
}
}
for (int i = 0; i < n; i++) {
if (i == find(i) && infect[i] >= 0)
cnt[infect[i]] += size[i];
}
sort(initial.begin(), initial.end());
int ans = initial[0];
int ma = cnt[ans];
for (int i : initial) {
if (cnt[i] > ma) {
ans = i;
ma = cnt[i];
}
}
return ans;
}
};
此题先将非病毒的节点结合,遍历与病毒节点相连的节点,用infect数组打上标签记录节点集合病毒的源头,当病毒的源头不止一个时,标为-2,此时不论怎么删都无法拯救这个集合。size数组统计集合的大小,virus只是为了查找病毒节点方便,cnt统计病毒节点所能拯救的节点数
带权并查集、可持久化并查集、可撤销并查集