import java.io.BufferedInputStream;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStreamReader;
public class Test{
//取unicode编码
public static String toHex(byte[] byteArray) {
int i;
StringBuffer buf = new StringBuffer("");
int len = byteArray.length;
for (int offset = 0; offset < len; offset++) {
i = byteArray[offset];
if (i < 0)
i += 256;
if (i < 16)
buf.append("0");
buf.append(Integer.toHexString(i));
}
return buf.toString().toUpperCase();
}
public static String getCharset(String fileName) throws IOException {
BufferedInputStream bin = new BufferedInputStream(new FileInputStream(
new File(fileName)));
int p = (bin.read() << 8) + bin.read();
String charset = "";
switch (p) {
//文件头信息以efbb开始的是UTF-8编码格式
case 0xefbb:
charset = "UTF-8";
break;
//
case 0xfffe:
charset = "Unicode";
break;
//
case 0xfeff:
charset = "UTF-16BE";
break;
default:
charset = "GBK";
}
return charset;
}
public static void main(String[] args) throws IOException {
//txt文件路径
String fileName = "C:\\users\\administrator\\Desktop\\班智达.txt";
//取得txt文件的编码格式
String charset = getCharset(fileName);
//读入流
BufferedReader br = new BufferedReader(new InputStreamReader(
new FileInputStream(new File(fileName)),charset));
String a = br.readLine();
//如果是utf-8,要去掉文件头额外的一个字符 \ufeff
if (charset.equalsIgnoreCase("UTF-8")) {
a = a.substring(1);
}
System.out.println(a);
char[] ch = a.toCharArray();
for (int i = 0; i < ch.length; i++) {
String ss = "\\u" + Integer.toHexString(ch[i]);
System.out.println(ss);
}
}
}
效果:
刁拜︽搬爸︽伴吵︽椿粹
刁拜︽搬爸︽伴吵︽椿粹
\ufeff
\u5201
\u62dc
\ufe3d
\u642c