public static String charset(String value, String charsets[]) throws UnsupportedEncodingException {
String probe = StandardCharsets.UTF_8.name();
for(String c : charsets) {
Charset charset = Charset.forName(c);
if(charset != null) {
if(value.equals(convert(convert(value, charset.name(), probe), probe, charset.name()))) {
return c;
}
}
}
return StandardCharsets.UTF_8.name();
}
public static String convert(String value, String fromEncoding, String toEncoding) throws UnsupportedEncodingException {
return new String(value.getBytes(fromEncoding), toEncoding);
}
主函数为:
String charset = charset("你好", new String[]{"iso-8859-1", "utf-8"});
System.out.println(charset);
输出为:
utf-8
又将字符编码和字符串的编码类型判断的测试类整理了一个完整版供大家参考
import java.io.UnsupportedEncodingException;
import java.nio.charset.Charset;
import java.nio.charset.StandardCharsets;
public class CharsetTest {
public static void main(String[] args) throws UnsupportedEncodingException {
String s3 = "\u0061";
String s4="\u6c49";
System.out.println(s3);
System.out.println(s4+"\n");
System.out.println("test string.getChars(...):");
String s = "你好lkf&*";
printChars(s);
System.out.println();
System.out.println("test string.getBytes(charset):\n");
String s1 = "汉";
String s2 = "a";
//文件本身编码方式为utf-8
System.out.println("\""+s1+"\""+"的编码结果:");
printEncoding(s1,null);
System.out.println("-------------------------");
System.out.println("\""+s2+"\""+"的编码结果:");
printEncoding(s2,null);
System.out.println("\nBOM:Byte order marker,0xfeff为big-endian,0xfffe为litter-endian");
System.out.println("\n你好:");
printEncoding("你好",new String[]{"iso-8859-1","utf-8"});
System.out.println();
String x1 = new String("slfjl你好".getBytes("utf-8"), "iso-8859-1");
//判断字符集
String charset1 = charset(x1, new String[]{"iso-8859-1", "utf-8"});
System.out.println("x1:"+x1);
System.out.println("x1:encoding:"+charset1);
String x2 = new String(x1.getBytes("iso-8859-1"), "utf-8");
//判断字符集
String charset2 = charset(x2, new String[]{"iso-8859-1", "utf-8"});
System.out.println("x2:"+x2);
System.out.println("x2:encoding:"+charset2);
}
public static void printEncoding(String s1,String [] encodings) {
String[] encodes = encodings==null?new String[]{"utf-8","utf-16","utf-16le","utf-16be","iso-8859-1","us-ascii", "gbk", "gb2312","gb18030","unicode"}:encodings;
for (String encode : encodes) {
StringBuilder x = getEncodeStr(s1, encode);
System.out.println(x);
}
}
public static StringBuilder getEncodeStr(String s1, String encode) {
byte[] bytes = null;
StringBuilder x=null;
try {
System.out.print(encode+":");
bytes = s1.getBytes(encode);
x= toHexString(bytes);
} catch (UnsupportedEncodingException e) {
e.printStackTrace();
}
return x;
}
public static void printChars(String s) {
char[] chars = new char[s.length()];
s.getChars(0,s.length(),chars,0);
for (char aChar : chars) {
System.out.println(aChar);
}
}
public static StringBuilder toHexString(byte[] bytes) {
StringBuilder b = new StringBuilder("0x(");
for(int i=0; i < bytes.length; i++){
b.append(Character.forDigit((bytes[i] >> 4) & 0xF, 16));
b.append(Character.forDigit((bytes[i] & 0xF), 16));
if (i < (bytes.length - 1)) {
b.append(" ");
}
}
b.append(")");
return b;
}
public static String charset(String value, String charsets[]) throws UnsupportedEncodingException {
String probe = StandardCharsets.UTF_8.name();
for(String c : charsets) {
Charset charset = Charset.forName(c);
if(charset != null) {
if(value.equals(convert(convert(value, charset.name(), probe), probe, charset.name()))) {
return c;
}
}
}
return StandardCharsets.UTF_8.name();
}
public static String convert(String value, String fromEncoding, String toEncoding) throws UnsupportedEncodingException {
return new String(value.getBytes(fromEncoding), toEncoding);
}
}