import java.io.BufferedInputStream;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileReader;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.LineNumberReader;
import java.nio.file.Files;
import java.nio.file.Paths;
public class Example3 {
public static void main(String[] args) {
try {
File file = new File("D:\\resultBigDataFile\\HumanBigData1.csv");
if (file.exists()) {
long startTime = System.currentTimeMillis();
FileReader fr = new FileReader(file);
LineNumberReader lnr = new LineNumberReader(fr);
int lines = 0;
while (lnr.readLine() != null) {
++lines;
}
lnr.close();
long endTime = System.currentTimeMillis();
System.out.println("方案一读取总行数 : " + lines + " 共耗时 : " + (endTime - startTime));
}
} catch (Exception e) {
e.printStackTrace();
}
try {
File file = new File("D:\\resultBigDataFile\\HumanBigData1.csv");
if (file.exists()) {
long startTime = System.currentTimeMillis();
long fileLength = file.length();
LineNumberReader lineNumberReader = new LineNumberReader(new FileReader(file));
lineNumberReader.skip(fileLength);
int lines = lineNumberReader.getLineNumber();
lineNumberReader.close();
long endTime = System.currentTimeMillis();
System.out.println("方案二读取总行数 : " + lines + " 共耗时 : " + (endTime - startTime));
}
} catch (Exception e) {
e.printStackTrace();
}
try {
long startTime = System.currentTimeMillis();
long lines = Files.lines(Paths.get(new File("D:\\resultBigDataFile\\HumanBigData1.csv").getPath())).count();
long endTime = System.currentTimeMillis();
System.out.println("方案三读取总行数 : " + lines + " 共耗时 : " + (endTime - startTime));
} catch (Exception e) {
e.printStackTrace();
}
InputStream is = null;
try {
long startTime = System.currentTimeMillis();
long lines = 0l;
int readChars = 0;
byte[] buffer = new byte[4096];
is = new BufferedInputStream(new FileInputStream(new File("D:\\resultBigDataFile\\HumanBigData1.csv")));
while ((readChars = is.read(buffer, 0, 4096)) != -1) {
for (int i = 0; i < readChars; ++i) {
if (buffer[i] == '\n') {
++lines;
}
}
}
long endTime = System.currentTimeMillis();
System.out.println("方案四读取总行数 : " + lines + " 共耗时 : " + (endTime - startTime));
} catch (Exception e) {
} finally {
try {
is.close();
} catch (Exception e) {
}
}
BufferedReader bufferedReader = null;
try {
long startTime = System.currentTimeMillis();
long lines = 0l;
char[] readChars = new char[4096];
char[] nChar = new char[]{'\n'};
int length = 0;
bufferedReader = new BufferedReader(new FileReader(new File("D:\\resultBigDataFile\\HumanBigData1.csv")));
while ((length = bufferedReader.read(readChars, 0, 4096)) != -1) {
for (int i = 0; i < length; ++i) {
if (readChars[i] == nChar[0]) {
++lines;
}
}
}
long endTime = System.currentTimeMillis();
System.out.println("方案五读取总行数 : " + lines + " 共耗时 : " + (endTime - startTime));
} catch (Exception e) {
} finally {
try {
bufferedReader.close();
} catch (Exception e) {
}
}
BufferedReader br = null;
try {
long startTime = System.currentTimeMillis();
long lines = 0;
File file = new File("D:\\resultBigDataFile\\HumanBigData1.csv");
FileInputStream fis = new FileInputStream(file);
InputStreamReader isr = new InputStreamReader(fis);
br = new BufferedReader(isr);
while ((br.readLine()) != null) {
++lines;
}
long endTime = System.currentTimeMillis();
System.out.println("方案六读取总行数 : " + lines + " 共耗时 : " + (endTime - startTime));
} catch (Exception e) {
} finally {
if (br != null) {
try {
br.close();
} catch (Exception e) {
}
}
}
BufferedReader bufReader = null;
try {
String line = "";
long lines = 0l;
long startTime = System.currentTimeMillis();
bufReader = new BufferedReader(new FileReader(new File("D:\\resultBigDataFile\\HumanBigData1.csv")));
while ((line = bufReader.readLine()) != null) {
++lines;
}
long endTime = System.currentTimeMillis();
System.out.println("方案七读取总行数 : " + lines + " 共耗时 : " + (endTime - startTime));
} catch (Exception e) {
} finally {
if (bufReader != null) {
try {
bufReader.close();
} catch (Exception e) {
}
}
}
}
}
文件大小大约53GB 总行数是5亿行
方案一读取总行数 : 500000000 共耗时 : 277428
方案二读取总行数 : 500000000 共耗时 : 274790
方案三读取总行数 : 500000000 共耗时 : 268075
方案四读取总行数 : 500000000 共耗时 : 140050 此值大约等于2分20秒
方案五读取总行数 : 500000000 共耗时 : 283071
方案六读取总行数 : 500000000 共耗时 : 265030
方案七读取总行数 : 500000000 共耗时 : 265581
BUILD SUCCESSFUL (total time: 29 minutes 34 seconds)
InputStream is = null;
try {
long startTime = System.currentTimeMillis();
long lines = 0l;
int readChars = 0;
byte[] buffer = new byte[8192];
is = new BufferedInputStream(new FileInputStream(new File("D:\\resultBigDataFile\\HumanBigData1.csv")));
while ((readChars = is.read(buffer, 0, 8192)) != -1) {
for (int i = 0; i < readChars; ++i) {
if (buffer[i] == '\n') {
++lines;
}
}
}
long endTime = System.currentTimeMillis();
System.out.println("方案四读取总行数 : " + lines + " 共耗时 : " + (endTime - startTime));
} catch (Exception e) {
} finally {
try {
is.close();
} catch (Exception e) {
}
}
方案四读取总行数 : 500000000 共耗时 : 157106
BUILD SUCCESSFUL (total time: 2 minutes 37 seconds)
从4亿行开始的任一行开始 读取1万条数据(按字节的方式读数据流,按回车键计算行行数的完美实现),互联网上最牛逼的实现方式之一
文件大小一共5亿行,53GB
import java.io.BufferedInputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.InputStream;
public class Example4 {
public static void main(String[] args) {
InputStream is = null;
try {
long from = 4000020001l;//最后一位是小写的L
long to = 400003000l;//最后一位是小写的L
long startTime = System.currentTimeMillis();
long lines = 0l;
int readChars = 0;
byte[] buffer = new byte[256];
is = new BufferedInputStream(new FileInputStream(new File("D:\\resultBigDataFile\\HumanBigData1.csv")));
while ((readChars = is.read(buffer, 0, 256)) != -1) {
for (int i = 0; i < readChars; i++) {
if (buffer[i] == '\n') {
if (lines == from-2) {
for (int j = i + 1; j < readChars; j++) {
if (buffer[j] == '\n') {
++lines;
}
}
System.out.print(new String(buffer, i + 1, buffer.length - i - 1));
i = buffer.length - 1;
}
if (lines > from-2 && lines < to) {
for (int j = i + 1; j < readChars; j++) {
if (buffer[j] == '\n') {
++lines;
}
}
System.out.print(new String(buffer, 0, 256));
i = buffer.length - 1;
}
if (lines == to) {
for (int j = i + 1; j < readChars; j++) {
if (buffer[j] == '\n') {
++lines;
}
}
System.out.print(new String(buffer, 0, i ));
i = buffer.length - 1;
}
++lines;
}
}
if (lines > to) {
break;
}
}
long endTime = System.currentTimeMillis();
System.out.println("\n方案八读取总行数 : " + lines + " 共耗时 : " + (endTime - startTime));
} catch (Exception e) {
e.printStackTrace();
} finally {
try {
is.close();
} catch (Exception e) {
}
}
}
}
方案八读取总行数 : 400003003 共耗时 : 115669
BUILD SUCCESSFUL (total time: 1 minute 56 seconds)