背景:
分配给的任务是解析连个Json文件,找到两个Json文件poiid相同的部分,之后对对比cluster_children数组内的所有poiid,筛选出Json_A 和 Json_B 中的独有的poiid.
例如:
A B
poiid == poiid
cluseter_children[ cluseter_children[
poiid: poiid:
1 2
2 3
3 5
4
要的结果:
A : 1, 4. B : 5
直接上代码
package SelectPoiid;
import com.google.gson.JsonObject;
import com.jayway.jsonpath.JsonPath;
import net.sf.json.JSON;
import net.sf.json.JSONObject;
import org.apache.commons.collections.CollectionUtils;
import org.apache.commons.io.FileUtils;
import java.io.File;
import java.sql.SQLOutput;
import java.util.*;
public class Selcet {
public static void main(String[] args) throws Exception{
String JsonPath_A = "/Users/zhaokeyi/Desktop/crawl_result_A";
String JsonPath_B = "/Users/zhaokeyi/Desktop/crawl_result_B";
Map<String, String> mapA = new HashMap<>();
Map<String, String> mapB = new HashMap<>();
List<String> LinesA = FileUtils.readLines(new File(JsonPath_A));
List<String> LinesB = FileUtils.readLines(new File(JsonPath_B));
for (String line : LinesA){
JSONObject obj = JSONObject.fromObject(line);
if (obj.has("poiid")){
String poiid = JsonPath.read(line, "$.poiid");
mapA.put(poiid, line);
}
}
for (String line : LinesB){
JSONObject obj = JSONObject.fromObject(line);
if (obj.has("poiid")){
String poiid = JsonPath.read(line, "$.poiid");
mapB.put(poiid, line);
}
}
// 所有poiid存入Map
Set<String> setA = mapA.keySet();
Set<String> setB = mapB.keySet();
for (String id : setA){
if (setB.contains(id)){
String value_A = mapA.get(id); // JsonA
String value_B = mapB.get(id); // JsonB
// 判断
JSONObject obj1 = JSONObject.fromObject(mapA.get(id));
JSONObject obj2 = JSONObject.fromObject(mapB.get(id));
if (obj1.has("cluster_children") && obj2.has("cluster_children")) {
// 分别吧cluster_children存入List
List<String> childA = JsonPath.read(value_A, "$.cluster_children..child_poi.poiid");
List<String> childB = JsonPath.read(value_B, "$.cluster_children..child_poi.poiid");
// 找到不相等的内容
if (!isListEqual(childA, childB)) {
// 找出区别的部分
// 交集
List<String> mix1 = new ArrayList<String>(childA);
List<String> mix2 = new ArrayList<String>(childA);
mix1.retainAll(childB);
mix2.retainAll(childB);
// System.out.println("交集1 : " + mix1);
// System.out.println("交集2 : " + mix2);
// System.out.println("A : " + childA);
// System.out.println("B : " + childB);
// A 去交集
childA.removeAll(mix1);
System.out.printf(id + ",");
System.out.printf("crawl_result_A");
csv(childA);
// B 去交集
// System.out.println("现在的mix2" + mix2);
childB.removeAll(mix2);
System.out.printf(id + ",");
System.out.printf("crawl_result_B");
csv(childB);
}
}
}
}
}
public static <E>boolean isListEqual(List<E> list1, List<E> list2) {
// 两个list引用相同(包括两者都为空指针的情况)
if (list1 == list2) {
return true;
}
// 两个list都为空(包括空指针、元素个数为0)
if ((list1 == null && list2 != null && list2.size() == 0)
|| (list2 == null && list1 != null && list1.size() == 0)) {
return true;
}
// 两个list元素个数不相同
if (list1.size() != list2.size()) {
return false;
}
// 两个list元素个数已经相同,再比较两者内容
// 采用这种可以忽略list中的元素的顺序
// 涉及到对象的比较是否相同时,确保实现了equals()方法
if (!list1.containsAll(list2)) {
return false;
}
return true;
}
public static <E> void csv(List<E> list){
if (list.size() == 0) System.out.println("");
else System.out.printf(",");
for (int i = 0; i < list.size(); i++){
if (i != list.size() - 1)
System.out.printf((String) list.get(i) + ",");
else
System.out.println((String) list.get(i));
}
}
}