一 并查集的概念
概念:并查集是一种树形的结构,这种数据结构是把一些元素按照一定的关系组合在一起。比如在亲戚关系的场景下,并查集是由一个跟节点(根节点指向自己)和所有他的子节点(可能是他的孩子节点或者子孙节点)组成。
并查集的图解:
特点:并查集(除了跟节点)所有节点都指向他们的父节点,跟节点指向自己
常见用法:
首先把一批数据按照特点(比如按照父子关系)拆分到不同的集合中,变成并查集,然后用于处理业务逻辑
二 并查集在项目中的实际应用
项目背景
需要做一个通过excel导入商品类目的功能,(洁具-浴盆)就是一个类目链。在我们必须保证先把父级类目导入,然后才能导入子级类目信息。现在存在的问题就是当导入的数据量较大的时候,接口会超时。会导致前端无法感知导入的结果。需要对接口做多线程导入的优化。
项目调研
根据需求的特点和实体之间的关联关系决定采用并查集来做优化处理。先把excel中的所有数据按照并查集的规则拆分开。然后对森林做分层导入。不同层之间需要串行导入(因为存在数据之间存在父子关系),同层之间可以并行校验导入。
思路图解
处理流程:无序的节点->分组->变成并查集(数据分层)
代码实现
1 定义一个上传结点类
import lombok.AllArgsConstructor;
import lombok.Data;
import java.io.Serializable;
@Data
@AllArgsConstructor
public class CategoryUploadDto implements Serializable {
private static final long serialVersionUID = 8362987561243233425L;
/**
* 名称
*/
private String name;
/**
* 父级类目名称(如果是'无'代表是一级类目)
*/
private String parentName;
/**
* 备注
*/
private String remark;
}
2 mock上传的数据
private static List<CategoryUploadDto> mockUploadDto() {
CategoryUploadDto sanitaryWare = new CategoryUploadDto("洁具", "无", "洁具一级类目");
CategoryUploadDto bathtub = new CategoryUploadDto("浴盆", "洁具", "浴盆二级类目");
CategoryUploadDto womenClothing = new CategoryUploadDto("女装", "无", "女装一级类目");
CategoryUploadDto womenShoes = new CategoryUploadDto("女鞋", "女装", "女鞋二级类目");
CategoryUploadDto toilet = new CategoryUploadDto("马桶", "洁具", "马桶二级类目");
return Arrays.asList(sanitaryWare, bathtub, womenClothing, womenShoes, toilet);
}
3 对数据用并查集的思想分组
private static Map<CategoryUploadDto, List<CategoryUploadDto>> getDisjointSet(List<CategoryUploadDto> categoryUploadDtoList) {
// 1 pack出名称和自己实体的map结构
Map<String, CategoryUploadDto> categoryNameCategoryMap = categoryUploadDtoList
.stream()
.collect(Collectors.toMap(CategoryUploadDto::getName, Function.identity()));
// 2 pack出自己名称和父类名称map结构
Map<String, String> categoryNameParentCategoryNameMap = categoryUploadDtoList
.stream().collect(Collectors.toMap(CategoryUploadDto::getName, category -> "无".equals(category.getParentName()) ?
category.getName() : category.getParentName()));
// 3 把属于同一个根的类目的放在一起
Map<CategoryUploadDto, List<CategoryUploadDto>> disjointSetMap = new HashMap<>();
categoryUploadDtoList.forEach(category -> {
if (category.getName().equals(categoryNameParentCategoryNameMap.get(category.getName()))) {
disjointSetMap.put(category, Lists.newArrayList(category));
} else {
CategoryUploadDto parentCategoryUploadDto = findParent(category, categoryNameParentCategoryNameMap, categoryNameCategoryMap);
if (Objects.nonNull(parentCategoryUploadDto)) {
if (disjointSetMap.containsKey(parentCategoryUploadDto)) {
disjointSetMap.get(parentCategoryUploadDto).add(category);
} else {
disjointSetMap.put(category, Lists.newArrayList(category));
}
} else {
disjointSetMap.put(category, Lists.newArrayList(category));
}
}
});
return disjointSetMap;
}
/**
* 找到根节点
*/
private static CategoryUploadDto findParent(CategoryUploadDto categoryUploadDto, Map<String, String> categoryNameParentCategoryNameMap,
Map<String, CategoryUploadDto> categoryNameCategoryMap) {
if (Objects.isNull(categoryUploadDto)) {
return null;
}
if (categoryUploadDto.getName().equals(categoryNameParentCategoryNameMap.get(categoryUploadDto.getName()))) {
return categoryUploadDto;
}
return findParent(categoryNameCategoryMap.get(categoryUploadDto.getParentName()), categoryNameParentCategoryNameMap, categoryNameCategoryMap);
}
并查集分组后的结果:
4 对并查集的结果进行分层:
private static List<List<List<CategoryUploadDto>>> buildTreeLevel(Map<CategoryUploadDto, List<CategoryUploadDto>> disjointSet) {
// 遍历并查集
List<List<List<CategoryUploadDto>>> result = disjointSet.entrySet().stream().map(singleTree -> {
// 1 先把所有孩子分组到一起
Map<String, List<CategoryUploadDto>> parentNameCategoryUploadDtoListMap =
singleTree.getValue().stream().collect(Collectors.groupingBy(CategoryUploadDto::getParentName));
// 2 进行分层处理
List<List<CategoryUploadDto>> list = new ArrayList<>();
Queue<List<CategoryUploadDto>> categoryUploadDtoListQueue = new ArrayDeque<>();
CategoryUploadDto root = singleTree.getKey();
categoryUploadDtoListQueue.add(Lists.newArrayList(root));
while (!categoryUploadDtoListQueue.isEmpty()) {
List<CategoryUploadDto> poll = categoryUploadDtoListQueue.poll();
List<CategoryUploadDto> newLevelNode = poll.stream()
.map(singleNode -> parentNameCategoryUploadDtoListMap.get(singleNode.getName()))
.filter(CollectionUtils::isNotEmpty)
.flatMap(List::stream).collect(Collectors.toList());
if (CollectionUtils.isNotEmpty(newLevelNode)) {
categoryUploadDtoListQueue.add(newLevelNode);
}
list.add(poll);
}
return list;
}).collect(Collectors.toList());
return result;
}
分层后的结果
分层之后就可以利用Stream进行同一层之间的并行,不同层之间的并行处理了。
5 完整代码
import com.google.common.collect.Lists;
import org.apache.commons.collections4.CollectionUtils;
import java.util.*;
import java.util.function.Function;
import java.util.stream.Collectors;
public class DisjointSet {
public static void main(String[] args) {
List<CategoryUploadDto> categoryUploadDtoList = mockUploadDto();
Map<CategoryUploadDto, List<CategoryUploadDto>> disjointSet = getDisjointSet(categoryUploadDtoList);
List<List<List<CategoryUploadDto>>> allLevelNodeTree = buildTreeLevel(disjointSet);
List<Boolean> booleans = mockUpload(allLevelNodeTree);
}
private static List<CategoryUploadDto> mockUploadDto() {
CategoryUploadDto sanitaryWare = new CategoryUploadDto("洁具", "无", "洁具一级类目");
CategoryUploadDto bathtub = new CategoryUploadDto("浴盆", "洁具", "浴盆二级类目");
CategoryUploadDto womenClothing = new CategoryUploadDto("女装", "无", "女装一级类目");
CategoryUploadDto womenShoes = new CategoryUploadDto("女鞋", "女装", "女鞋二级类目");
CategoryUploadDto toilet = new CategoryUploadDto("马桶", "洁具", "马桶二级类目");
return Arrays.asList(sanitaryWare, bathtub, womenClothing, womenShoes, toilet);
}
private static Map<CategoryUploadDto, List<CategoryUploadDto>> getDisjointSet(List<CategoryUploadDto> categoryUploadDtoList) {
// 1 pack出名称和自己实体的map结构
Map<String, CategoryUploadDto> categoryNameCategoryMap = categoryUploadDtoList
.stream()
.collect(Collectors.toMap(CategoryUploadDto::getName, Function.identity()));
// 2 pack出自己名称和父类名称map结构
Map<String, String> categoryNameParentCategoryNameMap = categoryUploadDtoList
.stream().collect(Collectors.toMap(CategoryUploadDto::getName, category -> "无".equals(category.getParentName()) ?
category.getName() : category.getParentName()));
// 3 把属于同一个根的类目的放在一起
Map<CategoryUploadDto, List<CategoryUploadDto>> disjointSetMap = new HashMap<>();
categoryUploadDtoList.forEach(category -> {
if (category.getName().equals(categoryNameParentCategoryNameMap.get(category.getName()))) {
disjointSetMap.put(category, Lists.newArrayList(category));
} else {
CategoryUploadDto parentCategoryUploadDto = findParent(category, categoryNameParentCategoryNameMap, categoryNameCategoryMap);
if (Objects.nonNull(parentCategoryUploadDto)) {
if (disjointSetMap.containsKey(parentCategoryUploadDto)) {
disjointSetMap.get(parentCategoryUploadDto).add(category);
} else {
disjointSetMap.put(category, Lists.newArrayList(category));
}
} else {
disjointSetMap.put(category, Lists.newArrayList(category));
}
}
});
return disjointSetMap;
}
/**
* 找到根节点
*/
private static CategoryUploadDto findParent(CategoryUploadDto categoryUploadDto, Map<String, String> categoryNameParentCategoryNameMap,
Map<String, CategoryUploadDto> categoryNameCategoryMap) {
if (Objects.isNull(categoryUploadDto)) {
return null;
}
if (categoryUploadDto.getName().equals(categoryNameParentCategoryNameMap.get(categoryUploadDto.getName()))) {
return categoryUploadDto;
}
return findParent(categoryNameCategoryMap.get(categoryUploadDto.getParentName()), categoryNameParentCategoryNameMap, categoryNameCategoryMap);
}
private static List<List<List<CategoryUploadDto>>> buildTreeLevel(Map<CategoryUploadDto, List<CategoryUploadDto>> disjointSet) {
// 遍历并查集
List<List<List<CategoryUploadDto>>> result = disjointSet.entrySet().stream().map(singleTree -> {
// 1 先把所有孩子分组到一起
Map<String, List<CategoryUploadDto>> parentNameCategoryUploadDtoListMap =
singleTree.getValue().stream().collect(Collectors.groupingBy(CategoryUploadDto::getParentName));
// 2 进行分层处理
List<List<CategoryUploadDto>> list = new ArrayList<>();
Queue<List<CategoryUploadDto>> categoryUploadDtoListQueue = new ArrayDeque<>();
CategoryUploadDto root = singleTree.getKey();
categoryUploadDtoListQueue.add(Lists.newArrayList(root));
while (!categoryUploadDtoListQueue.isEmpty()) {
List<CategoryUploadDto> poll = categoryUploadDtoListQueue.poll();
List<CategoryUploadDto> newLevelNode = poll.stream()
.map(singleNode -> parentNameCategoryUploadDtoListMap.get(singleNode.getName()))
.filter(CollectionUtils::isNotEmpty)
.flatMap(List::stream).collect(Collectors.toList());
if (CollectionUtils.isNotEmpty(newLevelNode)) {
categoryUploadDtoListQueue.add(newLevelNode);
}
list.add(poll);
}
return list;
}).collect(Collectors.toList());
return result;
}
private static List<Boolean> mockUpload(List<List<List<CategoryUploadDto>>> allLevelNodeTree) {
return allLevelNodeTree.parallelStream()
.map(oneTree -> oneTree
.stream().map(DisjointSet::mockBatchUpload).collect(Collectors.toList()))
.flatMap(List::stream)
.collect(Collectors.toList());
}
private static boolean mockBatchUpload(List<CategoryUploadDto> oneLevelCategoryList) {
// mock upload
return true;
}
}