package bigfile;
import java.io.IOException;
import java.io.RandomAccessFile;
import java.nio.ByteBuffer;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.Set;
import java.util.Vector;
import java.util.concurrent.atomic.AtomicLong;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* Author : Administrator
* Date : 2016年3月21日上午9:55:36
* Comment :
**/
public class MyReadFileThread {
private int threadPoolSizel = 3;
private int bufferSize;
private long fileLength;
private RandomAccessFile accessFile;
Vector<Thread> threads = new Vector<Thread>();
private Set<PairEntity> pairSet = new HashSet<MyReadFileThread.PairEntity>();
private AtomicLong counter = new AtomicLong(0);
private ArrayList<ArrayList> pairList = new ArrayList<ArrayList>();//存储分片list
private ArrayList<String> retList = new ArrayList<String>();//存储所有结果
private ArrayList<String> myList = new ArrayList<String>();//存储所有结果
public static void main(String[] args) {
new MyReadFileThread().StartUp("D:\\receiveMessage\\ebp.debug.2016-01-26-11.log", 1024*100);
}
private void StartUp(String fileName,int bufferSize){
long startTime = System.currentTimeMillis();
this.bufferSize = bufferSize;
try {
accessFile = new RandomAccessFile(fileName, "r");
this.fileLength = accessFile.length();
if(fileLength > 0){
this.threadPoolSizel = (int) (fileLength/(1024*1024*200));
if( this.threadPoolSizel > 5){
this.threadPoolSizel = 5;
}else if( this.threadPoolSizel == 0 ){
this.threadPoolSizel = 3;
}
}
long spliceSize = fileLength/threadPoolSizel;
calculatePair(0, spliceSize);
for (PairEntity pair : pairSet) {
Thread thread = new Thread(new PairReadTask(pair, bufferSize));
threads.add(thread);
thread.start();
System.out.println(pair.toString());
}
} catch (Exception e) {
e.printStackTrace();
}
for (Thread thread : threads) {
try {
thread.join();
} catch (InterruptedException e) {
e.printStackTrace();
}
}
shutDown();
System.out.println("use time: "+(System.currentTimeMillis()-startTime));
System.out.println("all line: "+counter.get());
}
/**
* 关闭文件读写和进程
* @throws IOException
*/
private void shutDown(){
try {
this.accessFile.close();
} catch (IOException e) {
e.printStackTrace();
}
}
/**
* 将文件分片
* @param start
* @param pairSize
* @throws IOException
*/
private void calculatePair(long start,long pairSize) throws IOException{
if(start > fileLength-1){
return;
}
PairEntity pairEnt = new PairEntity();
pairEnt.start = start;
//放入list
pairEnt.no = pairList.size();
pairList.add(new ArrayList<String>());
long endPos = start + pairSize -1;
//当最后的结束点大于文件的大小时设置为文件的大小
if(endPos > (fileLength -1)){
endPos = fileLength -1;
pairEnt.end = endPos;
pairSet.add(pairEnt);
return ;
}
//指定下次读取位置
accessFile.seek(endPos);
//继续读文件
byte tmp =(byte) accessFile.read();
boolean isSplit = false;
while(!isSplit){
endPos ++;
//如果到文件尾部了直接跳出循环
if(endPos > (fileLength -1)){
endPos = fileLength -1;
break ;
}
if(tmp == 10){//10 \n
accessFile.seek(endPos);
byte tmp1 =(byte) accessFile.read();
endPos ++;
//如果到文件尾部了直接跳出循环
if(endPos > (fileLength -1)){
endPos = fileLength -1;
break ;
}
if(tmp1 == 91){//91 [
isSplit = true;
endPos -= 2;
}
}
accessFile.seek(endPos);
tmp =(byte) accessFile.read();
}
pairEnt.end = endPos;
pairSet.add(pairEnt);
calculatePair(endPos + 1,pairSize);
}
/**
* 分块实体
* @author Administrator
*/
protected class PairEntity {
private long start;//分块开始
private long end;//分块结束
private int no;//编号
public long getStart() {
return start;
}
public void setStart(long start) {
this.start = start;
}
public long getEnd() {
return end;
}
public void setEnd(long end) {
this.end = end;
}
public int getNo() {
return no;
}
public void setNo(int no) {
this.no = no;
}
@Override
public String toString() {
return "No:" + no + " Start:" + start + " End:" + end;
}
}
/**
* 分块读写任务线程
* @author Administrator
*
*/
protected class PairReadTask implements Runnable {
private long start;
private long pairSize;
private int bufferSize;
private ArrayList<String> mList;
public PairReadTask(PairEntity pair,int bufferSize) {
this.start = pair.start;
this.pairSize = pair.end-pair.start+1;
this.bufferSize = bufferSize;
this.mList = pairList.get(pair.no);
}
@Override
public void run() {
try {
ByteBuffer tmpBuf = ByteBuffer.allocate(1024*1024);
for(int offset=0;offset<pairSize;offset+=bufferSize){
byte[] readBuffer = new byte[bufferSize];
int readLength = 0;
if(offset+bufferSize<=pairSize){
readLength = bufferSize;
}else{
readLength = (int) (pairSize-offset);
}
//设置文件读取位置
accessFile.seek(start + offset);
accessFile.read(readBuffer);
//加上上次匹配的长度
tmpBuf.put(readBuffer);
int startIdx = 0;
int endIdx = 0;
int len = tmpBuf.position();
byte[] tmpByte = tmpBuf.array();
for (int i = 0; i < len; i++) {
if(tmpByte[i] == 10 && tmpByte[i +1] == 91 && (i + 1) != len){
endIdx = i;
ByteBuffer buf = ByteBuffer.wrap(tmpByte, startIdx, endIdx - startIdx);
//System.out.println(new String(buf.array()));
startIdx = endIdx;
}
}
//将剩余的字节加入tmpBuf
tmpBuf.clear();
tmpBuf.put(ByteBuffer.wrap(tmpByte, startIdx, len - startIdx));
}
if(tmpBuf.position() > 0){
//最后剩余的字符串
}
}catch (Exception e) {
e.printStackTrace();
}
}
}
}
转载于:https://my.oschina.net/CallMain/blog/646148