package bigfile;
import java.io.IOException;
import java.io.RandomAccessFile;
import java.nio.MappedByteBuffer;
import java.nio.channels.FileChannel.MapMode;
import java.util.Arrays;
import java.util.HashSet;
import java.util.Set;
import java.util.concurrent.CyclicBarrier;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.atomic.AtomicLong;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* Author : Administrator
* Date : 2016年3月21日上午9:55:36
* Comment :
**/
public class MyReadFile {
private int threadPoolSizel = 3;
private int bufferSize;
private long fileLength;
private RandomAccessFile accessFile;
private ExecutorService execService;
private CyclicBarrier cycBarrier;
private Set<PairEntity> pairSet = new HashSet<MyReadFile.PairEntity>();
private AtomicLong counter = new AtomicLong(0);
public static void main(String[] args) {
//System.out.println((byte) '\n');
new MyReadFile().StartUp("D:\\receiveMessage\\ebp.debug.2016-01-26-11.log", 10240);
}
private void StartUp(String fileName,int bufferSize){
this.bufferSize = bufferSize;
try {
accessFile = new RandomAccessFile(fileName, "r");
this.fileLength = accessFile.length();
if(fileLength > 0){
this.threadPoolSizel = (int) (fileLength/(1024*1024*20));
}
long spliceSize = fileLength/threadPoolSizel;
calculatePair(0, spliceSize);
this.execService = Executors.newFixedThreadPool(threadPoolSizel);
} catch (Exception e) {
e.printStackTrace();
}
final long startTime = System.currentTimeMillis();
cycBarrier = new CyclicBarrier(pairSet.size(),new Runnable() {
@Override
public void run() {
System.out.println("use time: "+(System.currentTimeMillis()-startTime));
System.out.println("all line: "+counter.get());
shutDown();
}
});
for(PairEntity pair:pairSet){
System.out.println("分配分片:"+pair);
this.execService.execute(new PairReadTask(pair,bufferSize));
}
}
/**
* 关闭文件读写和进程
* @throws IOException
*/
private void shutDown(){
try {
this.accessFile.close();
} catch (IOException e) {
e.printStackTrace();
}
this.execService.shutdown();
}
/**
* 将文件分片
* @param start
* @param pairSize
* @throws IOException
*/
private void calculatePair(long start,long pairSize) throws IOException{
if(start > fileLength-1){
return;
}
PairEntity pairEnt = new PairEntity();
pairEnt.start = start;
long endPos = start + pairSize -1;
//当最后的结束点大于文件的大小时设置为文件的大小
if(endPos > (fileLength -1)){
endPos = fileLength -1;
pairEnt.end = endPos;
pairSet.add(pairEnt);
return ;
}
//指定下次读取位置
accessFile.seek(endPos);
//继续读文件
byte tmp =(byte) accessFile.read();
boolean isSplit = false;
while(!isSplit){
endPos ++;
//如果到文件尾部了直接跳出循环
if(endPos > (fileLength -1)){
endPos = fileLength -1;
break ;
}
if(tmp == 10){//10 \n
accessFile.seek(endPos);
byte tmp1 =(byte) accessFile.read();
endPos ++;
//如果到文件尾部了直接跳出循环
if(endPos > (fileLength -1)){
endPos = fileLength -1;
break ;
}
if(tmp1 == 91){//91 [
isSplit = true;
endPos -= 2;
}
}
accessFile.seek(endPos);
tmp =(byte) accessFile.read();
}
pairEnt.end = endPos;
pairSet.add(pairEnt);
calculatePair(endPos + 1,pairSize);
}
/**
* 分块实体
* @author Administrator
*/
protected class PairEntity {
private long start;//分块开始
private long end;//分块结束
public long getStart() {
return start;
}
public void setStart(long start) {
this.start = start;
}
public long getEnd() {
return end;
}
public void setEnd(long end) {
this.end = end;
}
@Override
public String toString() {
return "Start:" + start + " End:" + end;
}
}
/**
* 分块读写任务线程
* @author Administrator
*
*/
protected class PairReadTask implements Runnable {
private long start;
private long pairSize;
private byte[] readBuffer;
public PairReadTask(PairEntity pair,int bufferSize) {
this.start = pair.start;
this.pairSize = pair.end-pair.start+1;
this.readBuffer = new byte[bufferSize];
}
@Override
public void run() {
try {
MappedByteBuffer mapBuffer = accessFile.getChannel().map(MapMode.READ_ONLY,start, pairSize);
StringBuffer strBuf = new StringBuffer("");
for(int offset=0;offset<pairSize;offset+=bufferSize){
int readLength;
if(offset+bufferSize<=pairSize){
readLength = bufferSize;
}else{
readLength = (int) (pairSize-offset);
}
mapBuffer.get(readBuffer, 0, readLength);
String tempString = strBuf.append(new String(readBuffer,"GBK")).toString();
Arrays.fill(readBuffer,(byte)0);
int startIdx = -1;
int endIdx = 0;
//根据时间匹配出每条日志
Pattern p = Pattern.compile("\\[[0-9]{4}-[0-9]{2}-[0-9]{2}\\s[0-9]{2}:[0-9]{2}:[0-9]{2},[0-9]{3}\\]");
Matcher m = p.matcher(tempString);
while(m.find()){
if(startIdx == -1){
startIdx = m.start();
}else{
endIdx = m.start();
String line = tempString.substring(startIdx, endIdx);
//System.out.println("this:"+line);
counter.incrementAndGet();
//将日志记录包含查询条件的加入到list中
/*if(StringUtils.isNotEmpty(queryCondition)){
if(line.contains(queryCondition)) list.add(line);
}else{
list.add(line);
}*/
startIdx = endIdx;
}
}
//清空strBuf
strBuf.setLength(0);
//将剩余的字符串加入strBuf
strBuf.append(tempString.substring(startIdx, tempString.length()));
}
if(strBuf.length()>0){
//System.out.println("this:"+strBuf.toString());
counter.incrementAndGet();
}
cycBarrier.await();//测试性能用
}catch (Exception e) {
e.printStackTrace();
}
}
}
}
转载于:https://my.oschina.net/CallMain/blog/644637