**
fastJosn读取超大josn文件并用Map去重复
**
超大josn文本格式(简单示例):
[{“OrderID”:“001”,“OrderNo”:“A001”}
{“OrderID”:“002”,“OrderNo”:“A002”}
{“OrderID”:“003”,“OrderNo”:“A003”}
{“OrderID”:“004”,“OrderNo”:“A004”}]
MAVEN依赖
com.alibaba fastjson 1.2.9 commons-io commons-io 2.6实体类
public class Order{
private String orderId;
private String orderNo;
public String getOrderId() {
return orderId;
}
public void setOrderId(String orderId) {
this.orderId = orderId;
}
public String getOrderNo() {
return orderNo;
}
public void setOrderNo(String orderNo) {
this.orderNo = orderNo;
}
@Override
public String toString() {
return "Order{" +
"orderId='" + orderId + '\'' +
", orderNo='" + orderNo + '\'' +
'}';
}
}
测试类
import com.alibaba.fastjson.JSON;
import org.apache.commons.io.IOUtils;
import java.util.Map.Entry;
import java.util.*;
import java.io.*;
/*
文本文件内容:
[{"OrderID":"001","OrderNo":"A001"}
{"OrderID":"002","OrderNo":"A002"}
{"OrderID":"003","OrderNo":"A003"}
{"OrderID":"004","OrderNo":"A004"}]
基本思路:
1.com.alibaba.fastjson 读取josn文件到List
2.提取重复的放入一个list 利用map.containsKey()
3.提取不重复的放入另一个list 这里提供三种方式
* */
public class MapTest {
public static void main(String[] args) {
long start=System.currentTimeMillis();
try {
InputStream inputStream = new FileInputStream("E:\\IDEASOURCE\\text\\data5000000.json");
String text = IOUtils.toString(inputStream,"utf8");
List<Order> testList= JSON.parseArray(text, Order.class);
List<Order> repeatList = new ArrayList<Order>();//用于存放重复的元素的list
List<Order> soleList = new ArrayList<Order>();//用于存放不重复的元素的list
List<Order> soleList2 = new ArrayList<Order>();//用于存放不重复的元素的list
List<Order> soleList3 = new ArrayList<Order>();//用于存放不重复的元素的list
List<Order> soleList4 = new ArrayList<Order>();//用于存放不重复的元素的list
//利用map.containsKey()
Map<String, Order> map = new HashMap<>();
for(Order s : testList){
//1:map.containsKey() 检测key是否重复
if(map.containsKey(s.getOrderId())){
repeatList.add(s);
}else{
map.put(s.getOrderId(), s);
}
}
for(Order s : repeatList){
System.out.println("存在相同的元素:" + s.toString());
}
System.out.println("剔除相同的元素:" );
for(Map.Entry<String, Order> entry : map.entrySet()){
System.out.println(entry.getValue() );
}
// 3.提取不重复的放入另一个list 这里提供三种方式
// 方式一
long time1=System.nanoTime();
Set<String> set=map.keySet();
Iterator<String> it=set.iterator();
while(it.hasNext()) {
soleList.add(map.get(it.next()));
}
long time2=System.nanoTime();
//方式二
Set<Entry<String, Order>> it2= map.entrySet();
Iterator<Entry<String, Order>> entrys=it2.iterator();
while(entrys.hasNext()) {
Entry<String, Order> entry=entrys.next();
soleList2.add(entry.getValue());
}
long time3=System.nanoTime();
map.forEach((key,value)->{
soleList3.add(value);
});
long time4=System.nanoTime();
//方式四
Collection<Order> collection=map.values();
Iterator<Order> it4=collection.iterator();
while(it4.hasNext()) {
soleList4.add(it4.next());
}
long time5=System.nanoTime();
System.out.println("keySet方法耗费时间:"+(time2-time1));
System.out.println("entrySet方法耗费时间:"+(time3-time2));
System.out.println("JDK8 map.forEach耗时:"+(time4-time3));
System.out.println("map.values()耗时:"+(time5-time4));
}catch (Exception e)
{
e.printStackTrace();
}
long end=System.currentTimeMillis();
System.out.println("执行时长"+(end-start));
}
}