写在前边的实现需求:
1.总共10万个电话号码;
2.电话号码中有重复和错误;
3.查找出正确的号码(不重复);
一、优化前的实现方式:
1.先用正则过滤一遍10万条数据,找出错误的;
2.用List.Contains验证重复数据,List.Add添加不重复数据;
3.最终从List中取出正确的数据。
1 public classappMain {2 final static int _capacity = 1000000;3 final static Random rand = new Random(System.currentTimeMillis() +_capacity);4 static ArrayList list = new ArrayList(_capacity);5 static ArrayList newlist = new ArrayList(_capacity);6
7 public static void main(String[] args) throwsInterruptedException {8 long ts =System.currentTimeMillis();9 int modVal = _capacity / 3;10 for (int i = 0; i < _capacity; i++) {11 rand.setSeed(i);12 list.add(Integer.toString(Math.abs(rand.nextInt() %modVal)));13 }14 ts = System.currentTimeMillis() -ts;15 System.out.println("生成时间 :" +ts);16
17 test1();18 }19
20 static voidtest1() {21 newlist.clear();22 int repetition = 0;23 long ts =System.currentTimeMillis();24 for(String s : list) {25 if (!newlist.contains(s))26 newlist.add(s);27 else{28 repetition++;29 }30 }31 ts = System.currentTimeMillis() -ts;32 System.out.println("------ 插入检查方法 -------");33 System.out.println("查找时间 :" +ts);34 System.out.println("重复 :" +repetition);35 System.out.println("正确 :" +newlist.size());36 }37 }
优化前执行结果:
/*条件:capacity = 100000
结果:
生成时间 :33
------ 插入检查方法 -------
查找时间 :6612
重复 :76871
正确 :23129
------ 排序检查方法 -------
查找时间 :91
重复 :76871
正确 :23129*/
使用以上方式做导入的话数据量一旦超过5w以上马上出现假死状态,故肯定不可取,所以有了下边的优化。
二、优化后的实现方式:
1.先对10万数据排序;
2.对比前后两条数据(这个我之后会详细说明为什么这么做);
3.筛选出正确数据。
1 public classappMain {2 final static int _capacity = 1000000;3 final static Random rand = new Random(System.currentTimeMillis() +_capacity);4 static ArrayList list = new ArrayList(_capacity);5 static ArrayList newlist = new ArrayList(_capacity);6
7 public static void main(String[] args) throwsInterruptedException {8 long ts =System.currentTimeMillis();9 int modVal = _capacity / 3;10 for (int i = 0; i < _capacity; i++) {11 rand.setSeed(i);12 list.add(Integer.toString(Math.abs(rand.nextInt() %modVal)));13 }14 ts = System.currentTimeMillis() -ts;15 System.out.println("生成时间 :" +ts);16
17 test2();18 }19
20 static voidtest2() {21 newlist.clear();22 int repetition = 0;23 long ts =System.currentTimeMillis();24
25 Collections.sort(list);26 String str = list.get(0);27 int max =list.size();28 for (int i = 1; i < max; i++) {29 if(str.equals(list.get(i))) {30 repetition++;31 continue;32 }33 newlist.add(str);34 str =list.get(i);35 }36 newlist.add(str);37
38 ts = System.currentTimeMillis() -ts;39 System.out.println("------ 排序检查方法 -------");40 System.out.println("查找时间 :" +ts);41 System.out.println("重复 :" +repetition);42 System.out.println("正确 :" +newlist.size());43 }44 }
优化后执行结果:
/*条件:capacity = 1000000
结果:
生成时间 :392
------ 插入检查方法 -------
查找时间 :1033818
重复 :703036
正确 :296964
------ 排序检查方法 -------
查找时间 :1367
重复 :703036
正确 :296964*/
当数据量达到10万条的时候,查找时间比差不多90倍的差距了;当数据量达到100万时,我这边测试数据已经卡死在test1(),而test2()依然能在数十秒内反馈结果。
下边来简单解剖下源码:
1 Collections.sort(list);2 String str = list.get(0);3 int max =list.size();4 for (int i = 1; i < max; i++) {5 if(str.equals(list.get(i))) {6 repetition++;7 continue;8 }9 newlist.add(str);10 str =list.get(i);11 }
Line 1:排序,加入list排序后的结果是[1,2,2,3,3,3,4,4,4,4,5,5,5,5,5]
Line 2:初始str = 1;
从Line 4开始进入循环:
Line 5:判断str是否和当先selector值相等(暂借我们认为list.get(i)是一个指针),如果相等则跳过以下步骤进入下一个循环
Line 9:将str = 1,加入newlist尾
Line10:将当前selector值赋给str,此时str=2,进入下一个循环
...
这种语言解释我个人觉得特别麻烦,我还是写段代码让程序告诉你它怎么执行的。
1 public classappList {2 static ArrayList list = new ArrayList();3 static ArrayList newlist = new ArrayList();4
5 public static voidmain(String[] args) {6 for (int i = 1; i < 5 + 1; i++) {7 for (int j = 0; j < i; j++) {8 list.add(Integer.toString(i));9 }10 }11 System.out.println("list初始值 " +list.toString());12 //print输出值 [1, 2, 2, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5, 5]
13
14 String str = list.get(0);15 int max =list.size();16 for (int i = 1; i < max; i++) {17 Print(i);18 if(str.equals(list.get(i))) {19 PrintNew();20 continue;21 }22 newlist.add(str);23 System.out.println("add\t" +str);24 str =list.get(i);25 PrintNew();26 }27
28 newlist.add(str);29 System.out.println("add\t" +str);30 PrintNew();31
32 System.out.println("newlist值 " +newlist.toString());33 //print输出值 [1, 2, 3, 4, 5]
34 }35
36 static voidPrintNew(){37 StringBuilder stringBuilder = newStringBuilder();38 stringBuilder.append("newlist\t");39 for (int i = 0; i < newlist.size(); i++) {40 stringBuilder.append(newlist.get(i));41 stringBuilder.append(",");42 }43 System.out.println(stringBuilder.toString());44 System.out.println();45 }46 static void Print(intpos) {47 StringBuilder stringBuilder = newStringBuilder();48 stringBuilder.append("list\t");49 for (int i = 0; i < list.size(); i++) {50 if (i ==pos) {51 stringBuilder.append("[");52 stringBuilder.append(list.get(i));53 stringBuilder.append("],");54 } else{55 stringBuilder.append(list.get(i));56 stringBuilder.append(",");57 }58 }59 System.out.println(stringBuilder.toString());60 }
执行结果:
list初始值 [1, 2, 2, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5, 5]
list1,[2],2,3,3,3,4,4,4,4,5,5,5,5,5,
add1newlist1,
list1,2,[2],3,3,3,4,4,4,4,5,5,5,5,5,
newlist1,
list1,2,2,[3],3,3,4,4,4,4,5,5,5,5,5,
add2newlist1,2,
list1,2,2,3,[3],3,4,4,4,4,5,5,5,5,5,
newlist1,2,
list1,2,2,3,3,[3],4,4,4,4,5,5,5,5,5,
newlist1,2,
list1,2,2,3,3,3,[4],4,4,4,5,5,5,5,5,
add3newlist1,2,3,
list1,2,2,3,3,3,4,[4],4,4,5,5,5,5,5,
newlist1,2,3,
list1,2,2,3,3,3,4,4,[4],4,5,5,5,5,5,
newlist1,2,3,
list1,2,2,3,3,3,4,4,4,[4],5,5,5,5,5,
newlist1,2,3,
list1,2,2,3,3,3,4,4,4,4,[5],5,5,5,5,
add4newlist1,2,3,4,
list1,2,2,3,3,3,4,4,4,4,5,[5],5,5,5,
newlist1,2,3,4,
list1,2,2,3,3,3,4,4,4,4,5,5,[5],5,5,
newlist1,2,3,4,
list1,2,2,3,3,3,4,4,4,4,5,5,5,[5],5,
newlist1,2,3,4,
list1,2,2,3,3,3,4,4,4,4,5,5,5,5,[5],
newlist1,2,3,4,
add5newlist1,2,3,4,5,
newlist值 [1, 2, 3, 4, 5]