有6个文件,每个文件里大约200w整数,每行一个
找出所有文件里最大的一个数字
实现方法:
package com.yxie.test.data;
import com.magnanimityData.test.constant.MagnanimityDataConstant;
public class MainTest {
private static final int THREAD_COUNT = 6;
/***
* 差不多2250
*
* @param args
* @throws Exception
*/
public static void main(String[] args) throws Exception {
long beginTime = System.currentTimeMillis();
// 外围做任务分解,把一个耗时的任务分解为多个小任务,然后通过多线程方式计算,最后汇总结果。
ExecuteThread[] exeThreads = new ExecuteThread[THREAD_COUNT];
for (int i = 0; i < THREAD_COUNT; i++) {
exeThreads[i] = new ExecuteThread();
exeThreads[i].setFileName(MagnanimityDataConstant.READ_MAIN_FILE_PATH + i
+ MagnanimityDataConstant.READ_MAIN_FILE_PATH_PREFIX);
exeThreads[i].start();
}
String maxTotalValue = "0";
for (int i = 0; i < exeThreads.length; i++) {
String calResult = exeThreads[i].getResult();
if (calResult != null) {
// if (maxTotalValue.compareTo(calResult) < 0) {
// maxTotalValue = exeThreads[i].getResult();
// }
if (calResult.length() > maxTotalValue.length() && calResult.charAt(0) != '-') {
maxTotalValue = calResult;
} else if (calResult.length() == maxTotalValue.length()) {
if (calResult.compareTo(maxTotalValue) > 0) {
maxTotalValue = calResult;
}
}
// System.out.println("result:" + exeThreads[i].getResult());
}
}
System.out.println("maxTotalValue:" + maxTotalValue);
System.out.println("totalTime:" + (System.currentTimeMillis() - beginTime));
}
}
public class MagnanimityDataConstant {
public static final String READ_MAIN_FILE_PATH = "/home/xieyun/test/比赛/bigdata/bigdata";
public static final String READ_MAIN_FILE_PATH_PREFIX = ".txt";
public static final String MAIN_FILE_PATH = "/home/xieyun/test/比赛/bigdata/bigdata0.txt";
public static final String MAIN_FILE_PATH1 = "/home/xieyun/test/比赛/bigdata/bigdata1.txt";
public static final String MAIN_FILE_PATH2 = "/home/xieyun/test/比赛/bigdata/bigdata2.txt";
public static final String MAIN_FILE_PATH3 = "/home/xieyun/test/比赛/bigdata/bigdata3.txt";
public static final String MAIN_FILE_PATH4 = "/home/xieyun/test/比赛/bigdata/bigdata4.txt";
public static final String MAIN_FILE_PATH5 = "/home/xieyun/test/比赛/bigdata/bigdata5.txt";
}
package com.yxie.test.data;
import java.io.BufferedReader;
import java.io.FileReader;
import java.io.IOException;
public class ExecuteThread extends Thread {
private volatile boolean isCompile = false;
private String maxValue;
private String fileName;
public String getFileName() {
return fileName;
}
public void setFileName(String fileName) {
this.fileName = fileName;
}
public synchronized String getResult() {
while (!isCompile) {
try {
this.wait();
} catch (InterruptedException e) {
}
}
return maxValue;
}
private synchronized void compileResult() {
isCompile = true;
this.notifyAll();
}
@Override
public void run() {
String maxVal = readPartFile();
this.maxValue = maxVal;
compileResult();
}
private String readPartFile() {
BufferedReader in = null;
String maxVal = "0";
try {
in = new BufferedReader(new FileReader(fileName));
String line = null;
while ((line = in.readLine()) != null) {
// System.out.println("line:"+line);
// if (maxVal.compareTo(line) < 0) {
// maxVal = line;
// }
if (line.length() > maxVal.length() && line.charAt(0) != '-') {
maxVal = line;
} else if (line.length() == maxVal.length()) {
if (line.compareTo(maxVal) > 0) {
maxVal = line;
}
}
}
} catch (IOException e) {
e.printStackTrace();
} finally {
if (in != null) {
try {
in.close();
} catch (IOException e) {
}
}
}
return maxVal;
}
}
但是,其中一个同事的执行效率比我们的都要高一些,后来分析发现:原来是我们比较方式有问题。我们以为存储的是整数,所以就采用将字符串转换为Integer,然后进行比较。其实将整数字符串直接通过compareTo比较性能比Integer a = Integer.valueOf(b);if(a < b)这种方式好很多。200w数据,大约能节约30ms左右。
其实String.compareTo方法源码看,实现原理为: 将字符串拆为单个字符char。然后在对每个字符进行比较。也就是按照ASCII码比较。
Integer.valueOf会新建一个Integer。然后将字符串拆分为每个字符。然后进行一系列校验每个字符是否是整数。然后进行拼接。
所以以后要注意:整数字符串比较比转换为Integer要好。