逐行读取。
首先获取文件的编码格式;
读取文件内容。
log4j-1.2.17.jar
slf4j-api-1.4.3.jar
slf4j-log4j12-1.4.0.jar
1. 获取文件的编码格式
import java.io.BufferedInputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
public class TxtCommonMethods {
private static final Logger log = LoggerFactory
.getLogger(TxtCommonMethods.class);
/**
* @Title: getFileCharset
* @Description: 判断文件的编码格式
* @param filePath
* 文件绝对路径
* @return String
* @author
* @date 2015年12月26日
*/
public static String getFileCharset(String filePath) {
File file = new File(filePath);
if (!file.exists()) {
System.out.println("File not found.");
}
// 默认编码格式为GBK
String charset = "GBK";
FileInputStream is = null;
BufferedInputStream bis = null;
try {
byte[] first3Bytes = new byte[3];
boolean checked = false;
is = new FileInputStream(file);
bis = new BufferedInputStream(is);
bis.mark(0);
int read = bis.read(first3Bytes, 0, 3);
if (-1 == read) {
charset = "GBK";
} else if (first3Bytes[0] == (byte) 0xFF
&& first3Bytes[1] == (byte) 0xFE) {
charset = "UTF-16LE";
checked = true;
} else if (first3Bytes[0] == (byte) 0xFE
&& first3Bytes[1] == (byte) 0xFF) {
charset = "UTF-16BE";
checked = true;
} else if (first3Bytes[0] == (byte) 0xEF
&& first3Bytes[1] == (byte) 0xBB
&& first3Bytes[2] == (byte) 0xBF) {
charset = "UTF-8";
checked = true;
}
bis.reset();
if (!checked) {
int loc = 0;
while ((read = bis.read()) != -1) {
loc++;
if (read >= 0xF0) {
break;
}
if (0x80 <= read && read <= 0xBF) {
// 单独出现BF以下的,也算GBK
break;
}
if (0x80 <= read && read <= 0xDF) {
read = bis.read();
if (0x80 <= read && read <= 0xBF) {
// GBK
continue;
} else {
break;
}
} else if (0xE0 <= read && read <= 0xEF) {
read = bis.read();
if (0x80 <= read && read <= 0xBF) {
read = bis.read();
if (0x80 <= read && read <= 0xBF) {
charset = "UTF-8";
break;
} else {
break;
}
} else {
break;
}
}
}
}
} catch (FileNotFoundException e) {
log.error(
"Get charset of '" + filePath + "' fail:" + e.getMessage(),
e);
} catch (IOException e) {
log.error(
"Get charset of '" + filePath + "' fail:" + e.getMessage(),
e);
} catch (Exception e) {
log.error(
"Get charset of '" + filePath + "' fail:" + e.getMessage(),
e);
} finally {
TxtIOUtils.closeStream(bis, null);
TxtIOUtils.closeStream(is, null);
}
return charset;
}
}
2. 读取文件内容
import java.io.BufferedReader;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.Reader;
import java.io.UnsupportedEncodingException;
import java.util.List;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
public class ReadLoadTxtFileRunnable implements Runnable {
private static final Logger log = LoggerFactory
.getLogger(ReadLoadTxtFileRunnable.class);
private String filePath;
public ReadLoadTxtFileRunnable(String filePath) {
this.filePath = filePath;
}
@Override
public void run() {
this.read(filePath);
}
/**
* @Title: read
* @Description: 读取txt文件内容
* @param filePath
* 文件绝对路径
* @return List
* @author
* @date 2015年12月26日
*/
private void read(String filePath) {
log.info("Read Whole Grid Load txt file,filePath=" + filePath);
filePath = null == filePath ? null : filePath.trim();
if (null == filePath || "".equals(filePath)) {
log.error("The filePath is null.");
return null;
}
InputStream is = null;
Reader reader = null;
BufferedReader bufRead = null;
try {
is = new FileInputStream(filePath);
// 判断文件的编码格式
String charset = TxtCommonMethods.getFileCharset(filePath);
log.info("The charset of '" + filePath + "' is:" + charset);
reader = new InputStreamReader(is, charset);
bufRead = new BufferedReader(reader);
String line = null;
String[] arrs = null;
while ((line = bufRead.readLine()) != null) {
System.out.println(line);
}
} catch (FileNotFoundException e) {
log.error("Read file '" + filePath + "' fail:" + e.getMessage(), e);
} catch (UnsupportedEncodingException e) {
log.error("Read file '" + filePath + "' fail:" + e.getMessage(), e);
} catch (IOException e) {
log.error("Read file '" + filePath + "' fail:" + e.getMessage(), e);
} catch (Exception e) {
log.error("Read file '" + filePath + "' fail:" + e.getMessage(), e);
} finally {
TxtIOUtils.closeReader(bufRead);
TxtIOUtils.closeReader(reader);
TxtIOUtils.closeStream(is, null);
}
log.info("End of Read Whole Grid Load txt file");
}
}
3.TxtIOUtils.java
public class TxtIOUtils {
private static final Logger log = LoggerFactory.getLogger(TxtIOUtils.class);
public static void closeStream(InputStream is, OutputStream out) {
if (null != out) {
try {
out.close();
} catch (IOException e) {
log.error("Close OutputStream fail:" + e.getMessage(), e);
}
out = null;
}
if (null != is) {
try {
is.close();
} catch (IOException e) {
log.error("Close InputStream fail:" + e.getMessage(), e);
}
is = null;
}
}
public static void closeReader(Reader reader) {
if (null != reader) {
try {
reader.close();
} catch (IOException e) {
log.error("Close Reader fail:" + e.getMessage(), e);
}
reader = null;
}
}
public static void closeWriter(Writer writer) {
if (null != writer) {
try {
writer.close();
} catch (IOException e) {
log.error("Close Reader fail:" + e.getMessage(), e);
}
writer = null;
}
}
public static void closeChannel(Channel c) {
if (null != c) {
try {
c.close();
} catch (IOException e) {
log.error("Close Channel fail:" + e.getMessage(), e);
}
c = null;
}
}
}