代码如下
package com.oceansoft.dupcheck;
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileFilter;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.OutputStreamWriter;
import java.io.PushbackReader;
import java.io.Writer;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import com.oceansoft.dupcheck.util.IOUtils;
/**
* 大文件分割工具类(把大的文本文件分割为小的文本文件,按行分割)
*
* @author 储玉庭
*/
class FileSpliter {
private static ThreadLocal<AtomicInteger> threadCounter = new ThreadLocal<AtomicInteger>();
private static ThreadLocal<FileProceedCallback> threadCallback = new ThreadLocal<FileProceedCallback>();
public static final void setThreadFileCallback(FileProceedCallback callback) {
if (null != threadCallback.get()) {
throw new IllegalStateException("callback already set");
}
threadCallback.set(callback);
}
/**
* 分割指定目录下的文件
*
* @param inputDir
* 输入文件目录
* @param extension
* 文件扩展名
* @param splitSize
* 分割后的小文件的大小
* @throws IOException
*/
public static final void splitFiles(File inputDir, final String extension,
String encoding, int splitSize) throws IOException {
File[] files = inputDir.listFiles(new FileFilter() {
@Override
public boolean accept(File pathname) {
return pathname.getPath().endsWith(extension);
}
});
if (null != files) {
for (File file : files) {
splitFile(file, encoding, splitSize);
}
}
threadCallback.get().proceedEnd();
}
private static final void splitFile(File largeFile, String encoding,
int splitSize) throws IOException {
threadCounter.set(new AtomicInteger(0));
if (largeFile.length() <= splitSize) {
File tmpFile = nextFile(largeFile);
IOUtils.copyFile(largeFile, tmpFile);
threadCallback.get().proceeded(tmpFile);
return;
}
File proceedFile = null;
final FileProceedCallback callback = threadCallback.get();
PushbackReader reader = null;
Writer output = null;
try {
int c;
int totalBytes = (int) largeFile.length();
int charsRead = 0, totalBytesRead = 0;
String CRLF = System.getProperty("line.separator");
proceedFile = nextFile(largeFile);
reader = new PushbackReader(new BufferedReader(
new InputStreamReader(new FileInputStream(largeFile),
encoding)));
output = new BufferedWriter(new OutputStreamWriter(
new FileOutputStream(proceedFile), encoding));
boolean hasLine = false;
while (true) {
c = reader.read();
if (-1 == c) {
break;
}
totalBytesRead++;
charsRead++;
if ('\r' == c) {
if (!hasLine) {
hasLine = true;
// 保存文件,生成下一个文件
if (charsRead >= splitSize) {
hasLine = false;
output.close();
charsRead = 0;
callback.proceeded(proceedFile);
proceedFile = nextFile(largeFile);
output = new BufferedWriter(
new OutputStreamWriter(
new FileOutputStream(proceedFile),
encoding));
}
}
// consume \n if possible
c = reader.read();
if ('\r' != c && '\n' != c) {
reader.unread(c);
} else {
totalBytesRead++;
charsRead++;
}
} else if ('\n' == c) {
if (!hasLine) {
hasLine = true;
// 保存文件,生成下一个文件
if (charsRead >= splitSize) {
hasLine = false;
output.close();
charsRead = 0;
callback.proceeded(proceedFile);
proceedFile = nextFile(largeFile);
output = new BufferedWriter(
new OutputStreamWriter(
new FileOutputStream(proceedFile),
encoding));
}
}
} else {
if (hasLine) {
hasLine = false;
output.write(CRLF);
}
output.write(c);
}
}
} finally {
try {
if (null != reader) {
reader.close();
}
if (null != output) {
output.close();
}
} catch (IOException e) {
e.printStackTrace();
}
if (null != proceedFile) {
callback.proceeded(proceedFile);
}
}
}
private static final File nextFile(File original) {
String filename = original.getName();
File tempDir = new File(System.getProperty("java.io.tmpdir")
+ File.separator + "split");
File file = new File(tempDir, filename.concat(".").concat(
String.valueOf(threadCounter.get().getAndIncrement())));
if (null != file.getParentFile() && !file.getParentFile().exists()) {
file.getParentFile().mkdirs();
}
return file;
}
public static interface FileProceedCallback {
/**
* 某个文件处理成功
*
* @param file
*/
void proceeded(File file);
/**
* 所有文件处理完毕
*/
void proceedEnd();
}
}