// 多线程读取大文件
package com.zway.lcs.an.util;
import com.jcraft.jsch.*;
import lombok.extern.slf4j.Slf4j;
import org.apache.commons.net.ftp.FTPClient;
import java.io.*;
import java.nio.MappedByteBuffer;
import java.nio.channels.FileChannel.MapMode;
import java.util.HashSet;
import java.util.Set;
import java.util.concurrent.*;
import java.util.concurrent.atomic.AtomicLong;
@Slf4j
public class BigFileReader {
public static void main(String[] args) throws IOException {
// SftpUtils fiel = new SftpUtils("root","zway@123456","192.168.182.222",22);
// ChannelSftp channelSftp = fiel.loginChannelSftp();
// File file = fiel.readSFTPforFile(channelSftp, "/home/test/asas_error.2022-03-14.log");
FtpUtils ftpTools = new FtpUtils("192.168.182.222", 21, "ftpuser", "zway@123456");
ftpTools.connectClient();
File file1 = ftpTools.readFtpforFile("/home/test", "menu.sql");
BigFileReader.Builder builder = new BigFileReader.Builder(file1,"/home/test/menu.sql",new IHandle() {
@Override
public void handle(String line) {
System.out.println(line);
}
});
// builder.withTreahdSize(5)
// .withCharset("utf-8")
// .withBufferSize(1024*1024*5);
BigFileReader bigFileReader = builder.build();
bigFileReader.start();
}
private int threadSize;
private String charset;
private int bufferSize;
private IHandle handle;
private ExecutorService executorService;
private long fileLength;
private RandomAccessFile rAccessFile;
private Set<StartEndPair> startEndPairs;
private CyclicBarrier cyclicBarrier;
private AtomicLong counter = new AtomicLong(0);
public static class Builder{
private int threadSize=(int) (Runtime.getRuntime().availableProcessors()/(1-0.8));
private String charset=null;
private int bufferSize=1024*1024*5;
private IHandle handle;
private File file;
public Builder( File newFile,String file,IHandle handle) throws IOException {
this.file = newFile;
if(!this.file.exists())
throw new IllegalArgumentException("文件不存在!");
this.handle = handle;
}
public Builder withTreahdSize(int size){
this.threadSize = threadSize;
return this;
}
public Builder withCharset(String charset){
this.charset= charset;
return this;
}
public Builder withBufferSize(int bufferSize){
this.bufferSize = bufferSize;
return this;
}
public BigFileReader build(){
return new BigFileReader(this.file,this.handle,this.charset,this.bufferSize,this.threadSize);
}
}
private BigFileReader(File file, IHandle handle, String charset, int bufferSize, int threadSize){
this.fileLength = file.length();
this.handle = handle;
this.charset = charset;
this.bufferSize = bufferSize;
this.threadSize = threadSize;
try {
this.rAccessFile = new RandomAccessFile(file,"r");
} catch (FileNotFoundException e) {
e.printStackTrace();
}
// this.executorService = Executors.newFixedThreadPool(threadSize);
executorService = new ThreadPoolExecutor(10, threadSize,
100, TimeUnit.SECONDS, new LinkedBlockingQueue<>(1),
new ThreadPoolExecutor.DiscardPolicy());
startEndPairs = new HashSet<BigFileReader.StartEndPair>();
}
public void start(){
long everySize = this.fileLength/this.threadSize;
try {
calculateStartEnd(0, everySize);
} catch (IOException e) {
e.printStackTrace();
return;
}
final long startTime = System.currentTimeMillis();
cyclicBarrier = new CyclicBarrier(startEndPairs.size(),new Runnable() {
@Override
public void run() {
System.out.println("use time: "+(System.currentTimeMillis()-startTime));
System.out.println("all line: "+counter.get());
}
});
for(StartEndPair pair:startEndPairs){
System.out.println("分配分片:"+pair);
this.executorService.execute(new SliceReaderTask(pair));
}
}
private void calculateStartEnd(long start,long size) throws IOException{
if(start>fileLength-1){
return;
}
StartEndPair pair = new StartEndPair();
pair.start=start;
long endPosition = start+size-1;
if(endPosition>=fileLength-1){
pair.end=fileLength-1;
startEndPairs.add(pair);
return;
}
rAccessFile.seek(endPosition);
byte tmp =(byte) rAccessFile.read();
while(tmp!='\n' && tmp!='\r'){
endPosition++;
if(endPosition>=fileLength-1){
endPosition=fileLength-1;
break;
}
rAccessFile.seek(endPosition);
tmp =(byte) rAccessFile.read();
}
pair.end=endPosition;
startEndPairs.add(pair);
calculateStartEnd(endPosition+1, size);
}
public void shutdown(){
try {
this.rAccessFile.close();
} catch (IOException e) {
e.printStackTrace();
}
this.executorService.shutdown();
}
private void handle(byte[] bytes) throws UnsupportedEncodingException{
String line = null;
if(this.charset==null){
line = new String(bytes);
}else{
line = new String(bytes,charset);
}
if(line!=null && !"".equals(line)){
this.handle.handle(line);
counter.incrementAndGet();
}
}
private static class StartEndPair{
public long start;
public long end;
@Override
public String toString() {
return "star="+start+";end="+end;
}
@Override
public int hashCode() {
final int prime = 31;
int result = 1;
result = prime * result + (int) (end ^ (end >>> 32));
result = prime * result + (int) (start ^ (start >>> 32));
return result;
}
@Override
public boolean equals(Object obj) {
if (this == obj)
return true;
if (obj == null)
return false;
if (getClass() != obj.getClass())
return false;
StartEndPair other = (StartEndPair) obj;
if (end != other.end)
return false;
if (start != other.start)
return false;
return true;
}
}
private class SliceReaderTask implements Runnable{
private long start;
private long sliceSize;
private byte[] readBuff;
public SliceReaderTask(StartEndPair pair) {
this.start = pair.start;
this.sliceSize = pair.end-pair.start+1;
this.readBuff = new byte[bufferSize];
}
@Override
public void run() {
try {
MappedByteBuffer mapBuffer = rAccessFile.getChannel().map(MapMode.READ_ONLY,start, this.sliceSize);
ByteArrayOutputStream bos = new ByteArrayOutputStream();
for(int offset=0;offset<sliceSize;offset+=bufferSize){
int readLength;
if(offset+bufferSize<=sliceSize){
readLength = bufferSize;
}else{
readLength = (int) (sliceSize-offset);
}
mapBuffer.get(readBuff, 0, readLength);
for(int i=0;i<readLength;i++){
byte tmp = readBuff[i];
if(tmp=='\n' || tmp=='\r'){
handle(bos.toByteArray());
bos.reset();
}else{
bos.write(tmp);
}
}
}
if(bos.size()>0){
handle(bos.toByteArray());
}
cyclicBarrier.await();
}catch (Exception e) {
e.printStackTrace();
}
}
}
}
多线程读取大文件
于 2022-04-29 10:56:06 首次发布