要求
假设有一批数据,数据中存在重复的数据,数据间主要存在以下四种关系:
- 一对一:(唯一的 key 和唯一的 value 对应)
keyA – valueA - 一对多:(唯一的 key 和 多个 value 对应,同时这些 value 只与该 key 对应)
keyA – valueA
keyA – valueB - 多对一:(多个 key 与唯一的 value 对应)
keyA – valueA
keyB – valueA - 多对多:(多个 key,每个 key 与 相同的多个 value 对应,同时,每个 value 与相同的多个 key 对应)
keyA – valueA
keyA – valueB
keyB – valueA
keyB – valueB
其它情况不予考虑
代码
逻辑代码:
import java.util.Collection;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Set;
import org.apache.commons.lang3.StringUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.google.common.collect.HashMultimap;
import com.google.common.collect.Multimap;
public class Classfy {
private static final Logger LOGGER = LoggerFactory.getLogger(Classfy.class);
public static void classfyAssociate(List<Map<String, String>> list) {
Multimap<String, String> keyMap = HashMultimap.create();
Multimap<String, String> valueMap = HashMultimap.create();
String key = null;
String value = null;
// 第一步:对数据去重分类
for (Map<String, String> map : list) {
for (Entry<String, String> entry : map.entrySet()) {
key = entry.getKey();
value = entry.getValue();
keyMap.put(key, value);
valueMap.put(value, key);
}
}
// 第二步:取出一对一的数据
Map<String, String> oneToOneMap = collectOneToOne(keyMap, valueMap);
// 第三步:剩下的数据中取出多对多的数据
Map<Set<String>, Set<String>> manyToManyMap = collectManyToMany(keyMap, valueMap);
// 第三步:剩下的数据中取出一对多的数据
Map<String, Set<String>> oneToManyMap = collectOneToMany(keyMap, valueMap);
// 第四步:剩下的数据中取出多对一的数据
Map<Set<String>, String> manyToOneMap = collectManyToOne(keyMap, valueMap);
printOneToOneMap(oneToOneMap);
printManyToMany(manyToManyMap);
printOneToMany(oneToManyMap);
printManyToOne(manyToOneMap);
// 剩下的就是不符合上面四种数据关系的数据
LOGGER.info("print keyMap ...");
printMultimap(keyMap);
LOGGER.info("print valueMap ...");
printMultimap(valueMap);
}
private static Map<String, String> collectOneToOne(Multimap<String, String> keyMap,
Multimap<String, String> valueMap) {
Map<String, String> oneToOneMap = new HashMap<>();
Set<String> keySet = new HashSet<>(keyMap.keySet());
for (String key : keySet) {
Set<String> values = (Set<String>) keyMap.get(key);
if (values.size() == 1) {
// key 对应唯一的 value
for (String value : values) {
Collection<String> keys = valueMap.get(value);
if (keys.size() == 1) {
// 同时 value 对应唯一的 key ,则符合一对一的关系
oneToOneMap.put(key, value);
keyMap.remove(key, value);
valueMap.remove(value, key);
}
}
}
}
return oneToOneMap;
}
private static Map<Set<String>, Set<String>> collectManyToMany(Multimap<String, String> keyMap,
Multimap<String, String> valueMap) {
Map<Set<String>, Set<String>> manyToManyMap = new HashMap<>();
Set<String> removeKey = new HashSet<>();
Set<String> keySet = new HashSet<>(keyMap.keySet());
for (String key : keySet) {
try {
if (removeKey.contains(key)) {
continue;
}
Set<String> allKeys = new HashSet<>();
Set<String> allValues = new HashSet<>();
int keySize = 0;
int valueSize = 0;
Set<String> values = (Set<String>) keyMap.get(key);
if (values.size() > 1) {
// 多对多, 则key 对应的 value 必须多于 1
for (String value : values) {
Set<String> tmpKeys = (Set<String>) valueMap.get(value);
if (keySize == 0) {
keySize = tmpKeys.size();
} else if (keySize != tmpKeys.size()) {
// 多对多,每个 value 对应的 key 集合元素的个数是一样的
throw new Exception();
}
if (keySize <= 1) {
throw new Exception();
}
allKeys.addAll(tmpKeys);
}
for (String tmpKey : allKeys) {
Set<String> tmpValues = (Set<String>) keyMap.get(tmpKey);
if (valueSize == 0) {
valueSize = tmpValues.size();
} else if (valueSize != tmpValues.size()) {
// 多对多,每个 key 对应的 value 集合元素的个数是一样的
throw new Exception();
}
if (valueSize <= 1) {
throw new Exception();
}
allValues.addAll(tmpValues);
}
if (values.equals(allValues)) {
// 每个 key 的 value 集合个数一致,
// 而且 value 集合的交集与其中的某 key value 集合元素一致
// 则可说明每个 key 的 value 集合是一样的
manyToManyMap.put(allKeys, allValues);
removeKey.addAll(allKeys);
// 删除keyMap和valueMap中多对多的值
for (String tmpkey : allKeys) {
for (String tmpvalue : allValues) {
keyMap.remove(tmpkey, tmpvalue);
valueMap.remove(tmpvalue, tmpkey);
}
}
}
}
} catch (Exception e) {
}
}
return manyToManyMap;
}
private static Map<String, Set<String>> collectOneToMany(Multimap<String, String> keyMap,
Multimap<String, String> valueMap) {
Map<String, Set<String>> oneToManyMap = new HashMap<>();
Set<String> removeKey = new HashSet<>();
Set<String> keySet = new HashSet<>(keyMap.keySet());
for (String key : keySet) {
try {
if (removeKey.contains(key)) {
continue;
}
Set<String> values = new HashSet<>(keyMap.get(key));
if (values.size() > 1) {
// 一对多, 则 key 对应的 value 必须多于 1
for (String value : values) {
Set<String> keys = (Set<String>) valueMap.get(value);
if (keys.size() == 1) {
// 而且每个 value 对应的 key 必须唯一
continue;
} else {
throw new Exception();
}
}
oneToManyMap.put(key, values);
removeKey.add(key);
for (String value : values) {
keyMap.remove(key, value);
valueMap.remove(value, key);
}
}
} catch (Exception e) {
}
}
return oneToManyMap;
}
private static Map<Set<String>, String> collectManyToOne(Multimap<String, String> keyMap,
Multimap<String, String> valueMap) {
Map<Set<String>, String> manyToOneMap = new HashMap<>();
Set<String> removeKey = new HashSet<>();
Set<String> keySet = new HashSet<>(keyMap.keySet());
for (String key : keySet) {
try {
if (removeKey.contains(key)) {
continue;
}
Set<String> values = new HashSet<>(keyMap.get(key));
if (values.size() == 1) {
// 多对一, key 对应的 value 必须唯一
for (String value : values) {
Set<String> keys = new HashSet<>(valueMap.get(value));
if (keys.size() > 1) {
// 多对一, value 对应的 key 必须多于1
for (String tmpKey : keys) {
Set<String> tmpValues = new HashSet<>(keyMap.get(tmpKey));
if (tmpValues.size() == 1) {
// 多对一, 每个 key 对应的 value 必须唯一
continue;
} else {
throw new Exception();
}
}
manyToOneMap.put(keys, value);
removeKey.addAll(keys);
for (String tmpKey : keys) {
keyMap.remove(tmpKey, value);
valueMap.remove(value, tmpKey);
}
}
}
}
} catch (Exception e) {
}
}
return manyToOneMap;
}
private static void printMultimap(Multimap<String, String> multimap) {
LOGGER.info("---------------------------------------");
for (String str : multimap.keySet()) {
Set<String> keys = (Set<String>) multimap.get(str);
LOGGER.info(str + " : " + StringUtils.join(keys.toArray(), ","));
}
}
private static void printManyToOne(Map<Set<String>, String> manyToOneMap) {
LOGGER.info("---------------------------------------");
LOGGER.info("print manyToOneMap ...");
for (Entry<Set<String>, String> entry : manyToOneMap.entrySet()) {
Set<String> keys = entry.getKey();
String value = entry.getValue();
for (String tmpKey : keys) {
LOGGER.info("key:[{}], value:[{}]", tmpKey, value);
}
}
}
private static void printOneToMany(Map<String, Set<String>> oneToManyMap) {
LOGGER.info("---------------------------------------");
LOGGER.info("print oneToManyMap ...");
for (Entry<String, Set<String>> entry : oneToManyMap.entrySet()) {
String key = entry.getKey();
Set<String> values = entry.getValue();
for (String tmpValue : values) {
LOGGER.info("key:[{}], value:[{}]", key, tmpValue);
}
}
}
private static void printManyToMany(Map<Set<String>, Set<String>> manyToManyMap) {
LOGGER.info("---------------------------------------");
LOGGER.info("print manyToManyMap ...");
for (Entry<Set<String>, Set<String>> entry : manyToManyMap.entrySet()) {
Set<String> keys = entry.getKey();
Set<String> values = entry.getValue();
for (String tmpKey : keys) {
for (String tmpValue : values) {
LOGGER.info("key:[{}], value:[{}]", tmpKey, tmpValue);
}
}
}
}
private static void printOneToOneMap(Map<String, String> oneToOneMap) {
LOGGER.info("---------------------------------------");
LOGGER.info("print oneToOneMap ...");
for (Entry<String, String> entry : oneToOneMap.entrySet()) {
LOGGER.info("key:[{}], value:[{}]", entry.getKey(), entry.getValue());
}
}
}
测试代码:
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import org.junit.Before;
import org.junit.Test;
public class ClassfyTest {
@Before
public void setUp() throws Exception {
}
@Test
public void testClassfyToTwoKind() {
List<Map<String, String>> list = new ArrayList<>();
Map<String, String> map1 = new HashMap<>();
map1.put("keyA", "valueA");
Map<String, String> map2 = new HashMap<>();
map2.put("keyA", "valueB");
Map<String, String> map3 = new HashMap<>();
map3.put("keyB", "valueA");
Map<String, String> map4 = new HashMap<>();
map4.put("keyB", "valueB");
Map<String, String> map5 = new HashMap<>();
map5.put("keyC", "valueC");
Map<String, String> map6 = new HashMap<>();
map6.put("keyD", "valueD");
Map<String, String> map7 = new HashMap<>();
map7.put("keyD", "valueE");
Map<String, String> map8 = new HashMap<>();
map8.put("keyE", "valueF");
Map<String, String> map9 = new HashMap<>();
map9.put("keyF", "valueF");
Map<String, String> map10 = new HashMap<>();
map10.put("keyG", "valueG");
Map<String, String> map11 = new HashMap<>();
map11.put("keyH", "valueH");
Map<String, String> map12 = new HashMap<>();
map12.put("keyH", "valueI");
Map<String, String> map13 = new HashMap<>();
map13.put("keyI", "valueH");
Map<String, String> map14 = new HashMap<>();
map14.put("keyI", "valueI");
Map<String, String> map15 = new HashMap<>();
map15.put("keyJ", "valueJ");
Map<String, String> map16 = new HashMap<>();
map16.put("keyK", "valueK");
Map<String, String> map17 = new HashMap<>();
map17.put("keyK", "valueL");
Map<String, String> map18 = new HashMap<>();
map18.put("keyL", "valueM");
Map<String, String> map19 = new HashMap<>();
map19.put("keyM", "valueM");
Map<String, String> map20 = new HashMap<>();
map20.put("keyO", "valueO");
list.add(map1);
list.add(map2);
list.add(map3);
list.add(map4);
list.add(map5);
list.add(map6);
list.add(map7);
list.add(map8);
list.add(map9);
list.add(map10);
list.add(map11);
list.add(map12);
list.add(map13);
list.add(map14);
list.add(map15);
list.add(map16);
list.add(map17);
list.add(map18);
list.add(map19);
list.add(map20);
Classfy.classfyAssociate(list);
}
}
运行结果:
欢迎指出不足
转载请注明出处