判断字符串的编码类型,判断出字符串value的编码方式为charset中的哪一种,然后返回。

public static String charset(String value, String charsets[]) throws UnsupportedEncodingException {
    String probe = StandardCharsets.UTF_8.name();
    for(String c : charsets) {
        Charset charset = Charset.forName(c);
        if(charset != null) {
            if(value.equals(convert(convert(value, charset.name(), probe), probe, charset.name()))) {
                return c;
            }
        }
    }
    return StandardCharsets.UTF_8.name();
}
public static String convert(String value, String fromEncoding, String toEncoding) throws UnsupportedEncodingException {
    return new String(value.getBytes(fromEncoding), toEncoding);
}
主函数为:
String charset = charset("你好", new String[]{"iso-8859-1", "utf-8"});
System.out.println(charset);

输出为:

utf-8

 

又将字符编码和字符串的编码类型判断的测试类整理了一个完整版供大家参考

import java.io.UnsupportedEncodingException;
import java.nio.charset.Charset;
import java.nio.charset.StandardCharsets;

public class CharsetTest {
    public static void main(String[] args) throws UnsupportedEncodingException {

        String s3 = "\u0061";
        String s4="\u6c49";
        System.out.println(s3);
        System.out.println(s4+"\n");

        System.out.println("test string.getChars(...):");
        String s = "你好lkf&*";
        printChars(s);
        System.out.println();

        System.out.println("test string.getBytes(charset):\n");
        String s1 = "汉";
        String s2 = "a";
        //文件本身编码方式为utf-8
        System.out.println("\""+s1+"\""+"的编码结果:");
        printEncoding(s1,null);
        System.out.println("-------------------------");
        System.out.println("\""+s2+"\""+"的编码结果:");
        printEncoding(s2,null);

        System.out.println("\nBOM:Byte order marker,0xfeff为big-endian,0xfffe为litter-endian");

        System.out.println("\n你好:");
        printEncoding("你好",new String[]{"iso-8859-1","utf-8"});
        System.out.println();

        String x1 = new String("slfjl你好".getBytes("utf-8"), "iso-8859-1");
        //判断字符集
        String charset1 = charset(x1, new String[]{"iso-8859-1", "utf-8"});
        System.out.println("x1:"+x1);
        System.out.println("x1:encoding:"+charset1);

        String x2 = new String(x1.getBytes("iso-8859-1"), "utf-8");
        //判断字符集
        String charset2 = charset(x2, new String[]{"iso-8859-1", "utf-8"});
        System.out.println("x2:"+x2);
        System.out.println("x2:encoding:"+charset2);
    }

    public static void printEncoding(String s1,String [] encodings) {
        String[] encodes = encodings==null?new String[]{"utf-8","utf-16","utf-16le","utf-16be","iso-8859-1","us-ascii", "gbk", "gb2312","gb18030","unicode"}:encodings;
        for (String encode : encodes) {
            StringBuilder x = getEncodeStr(s1, encode);
            System.out.println(x);
        }
    }

    public static StringBuilder getEncodeStr(String s1, String encode) {
        byte[] bytes = null;
        StringBuilder x=null;
        try {
            System.out.print(encode+":");
            bytes = s1.getBytes(encode);
            x= toHexString(bytes);
        } catch (UnsupportedEncodingException e) {
            e.printStackTrace();
        }
        return x;
    }

    public static void printChars(String s) {
        char[] chars = new char[s.length()];
        s.getChars(0,s.length(),chars,0);
        for (char aChar : chars) {
            System.out.println(aChar);
        }
    }

    public static StringBuilder toHexString(byte[] bytes) {
        StringBuilder b = new StringBuilder("0x(");
        for(int i=0; i < bytes.length; i++){
            b.append(Character.forDigit((bytes[i] >> 4) & 0xF, 16));
            b.append(Character.forDigit((bytes[i] & 0xF), 16));
            if (i < (bytes.length - 1)) {
                b.append(" ");
            }
        }
        b.append(")");
        return b;
    }

    public static String charset(String value, String charsets[]) throws UnsupportedEncodingException {
        String probe = StandardCharsets.UTF_8.name();
        for(String c : charsets) {
            Charset charset = Charset.forName(c);
            if(charset != null) {
                if(value.equals(convert(convert(value, charset.name(), probe), probe, charset.name()))) {
                    return c;
                }
            }
        }
        return StandardCharsets.UTF_8.name();
    }
    public static String convert(String value, String fromEncoding, String toEncoding) throws UnsupportedEncodingException {
        return new String(value.getBytes(fromEncoding), toEncoding);
    }
}
  • 0
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值