import java.io.BufferedInputStream;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.UnsupportedEncodingException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
public class Test {
public static List<String> readFileToList(String path) {
String codeString = codeString(path);
boolean haveBom=true;
if ("utf-8无bom".equals(codeString) || "gbk".equals(codeString)) {
haveBom=false;
}
if ("utf-8无bom".equals(codeString)){
codeString="utf-8";
}
List<String> list = new ArrayList<String>();
BufferedReader reader = null;
try {
reader = new BufferedReader(new InputStreamReader(new FileInputStream(path), codeString));
String str = "";
while ((str = reader.readLine()) != null) {
if(haveBom){
list.add(removeBom(str,codeString));
haveBom=false;
}else{
list.add(str);
}
}
} catch (UnsupportedEncodingException e) {
e.printStackTrace();
} catch (FileNotFoundException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
} finally {
if (reader != null) {
try {
reader.close();
} catch (IOException e) {
e.printStackTrace();
}
}
}
return list;
}
public static String getRefData(String key){
Map<String, String> map=new HashMap<String, String>();
map.put("UTF-8", "EFBBBF");
map.put("Unicode", "FFFE");
map.put("UTF-16BE", "FEFF");
return map.get(key);
}
/**
* 在window下用记事本保存为utf8时会在文件开始处加入EFBBBF标识符,本方法可以判断并去掉这个标识符
* BOMs:
* 00 00 FE FF = UTF-32, big-endian
* FF FE 00 00 = UTF-32, little-endian
* EF BB BF = UTF-8,
* FE FF = UTF-16, big-endian
* FF FE = UTF-16, little-endian
*
* @param line
* @return
* @throws UnsupportedEncodingException
*/
public static String removeBom(String line,String codeString) {
byte[] allbytes = null;
try {
allbytes = line.getBytes(codeString);
} catch (UnsupportedEncodingException e) {
e.printStackTrace();
}
String start = "";
for (int i = 0; i < allbytes.length; i++)
{
int tmp = allbytes[i];
String hexString = Integer.toHexString(tmp);
// 1个byte变成16进制的,只需要2位就可以表示了,取后面两位,去掉前面的符号填充
if (hexString.length()<2) {
continue;
}
hexString = hexString.substring(hexString.length() - 2);
start += hexString.toUpperCase();
if (start.equals(getRefData(codeString))) {
break;
}
}
if (start.equals(getRefData(codeString))) {
try {
return new String(Arrays.copyOfRange(allbytes, getRefData(codeString).length()/2, allbytes.length),codeString);
} catch (UnsupportedEncodingException e) {
e.printStackTrace();
}
}
try {
return new String(Arrays.copyOfRange(allbytes, 0, allbytes.length),codeString);
} catch (UnsupportedEncodingException e) {
e.printStackTrace();
}
return line;
}
/**
* 判断文件的编码格式
* 目前能判断UTF-8(有或无BOM) Unicode UTF-16BE gbk五种类型
* @param fileName
* @return 文件编码格式
* @throws Exception
*/
public static String codeString(String fileName) {
String code = null;
try {
BufferedInputStream bin = new BufferedInputStream(
new FileInputStream(fileName));
int p = (bin.read() << 8) + bin.read();
switch (p) {
case 0xefbb:
code = "UTF-8";
break;
case 0xfffe:
code = "Unicode";
break;
case 0xfeff:
code = "UTF-16BE";
break;
default:
{
if (isUTF8(new File(fileName))) {
code = "utf-8无bom";
} else {
code = "gbk";
}
}
}
} catch (FileNotFoundException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
return code;
}
/**
* 判断文件是无dom的utf8还是gbk
* @param file
* @return
*/
public static boolean isUTF8(File file){
byte [] buf = FileUtil.readFileToByteArray (file);
/*for (byte b : buf) {
System.out.println(Integer.toHexString(b));
}*/
List<String> readFileToList = FileUtil.readFileToList(file,"utf-8");
byte[] list=new byte[10];
boolean isFirst=true;
int count=0;
for (String string : readFileToList) {
if (isFirst) {
try {
list=string.getBytes("utf-8");
} catch (UnsupportedEncodingException e) {
e.printStackTrace();
}
isFirst=false;
}else{
byte[] temp = null;
try {
temp = string.getBytes("utf-8");
} catch (UnsupportedEncodingException e) {
e.printStackTrace();
}
int index=list.length;
list=Arrays.copyOf(list, list.length+temp.length);
for (byte b : temp) {
list[index++]=b;
}
}
if (count<readFileToList.size()-1) {
list=Arrays.copyOf(list, list.length+2);
list[list.length-2]=0xd;
list[list.length-1]=0xa;
}
count++;
}
for (int i = 0; i < list.length; i++) {
if (i>=buf.length-1) {
break;
}
byte a = list[i];
byte b = buf[i];
if (a!=b) {
return false;
}
}
return true;
}
public static void main(String[] args) {
//读取任意编码格式的文件
readFileToList("文件路径");
}
}