过滤中文里的乱码

package cn.com.cnfic.brpa.util;

/**
 * 过滤乱码
 */
public class ChineseUtill {
    private static boolean isChinese(char c) {
        Character.UnicodeScript sc = Character.UnicodeScript.of(c);
        if (sc == Character.UnicodeScript.HAN) {
            return true;
        }
        return false;
    }

    public static boolean isPunctuation(char c) {
        Character.UnicodeBlock ub = Character.UnicodeBlock.of(c);
        if (    // punctuation, spacing, and formatting characters
                ub == Character.UnicodeBlock.GENERAL_PUNCTUATION
                        // symbols and punctuation in the unified Chinese, Japanese and Korean script
                        || ub == Character.UnicodeBlock.CJK_SYMBOLS_AND_PUNCTUATION
                        // fullwidth character or a halfwidth character
                        || ub == Character.UnicodeBlock.HALFWIDTH_AND_FULLWIDTH_FORMS
                        // vertical glyph variants for east Asian compatibility
                        || ub == Character.UnicodeBlock.CJK_COMPATIBILITY_FORMS
                        // vertical punctuation for compatibility characters with the Chinese Standard GB 18030
                        || ub == Character.UnicodeBlock.VERTICAL_FORMS
                        // ascii
                        || ub == Character.UnicodeBlock.BASIC_LATIN
        ) {
            return true;
        } else {
            return false;
        }
    }

    private static Boolean isUserDefined(char c) {
        Character.UnicodeBlock ub = Character.UnicodeBlock.of(c);
        if (ub == Character.UnicodeBlock.NUMBER_FORMS
                || ub == Character.UnicodeBlock.ENCLOSED_ALPHANUMERICS
                || ub == Character.UnicodeBlock.LETTERLIKE_SYMBOLS
                || c == '\ufeff'
                || c == '\u00a0'
        )
            return true;
        return false;
    }

    public static Boolean isMessy(String str)  {
        float chlength = 0;
        float count = 0;
        for(int i = 0; i < str.length(); i++) {
            char c = str.charAt(i);
            if(isPunctuation(c) || isUserDefined(c))
                continue;
            else {
                if(!isChinese(c)) {
                    count = count + 1;
                }
                chlength ++;
            }
        }
        float result = count / chlength;
        if(result > 0.3)
            return true;
        return false;
    }

    /**
     * 剔除中文里的乱码内容
     * @param str
     * @return
     */
    public static String fitlerMessy(String str)  {
        float chlength = 0;
        float count = 0;
        String bb="";
        for(int i = 0; i < str.length(); i++) {
            char c = str.charAt(i);
            if(isPunctuation(c) || isUserDefined(c))
                bb+=c;
            else {
                if(!isChinese(c)) {
                    continue;
                }
                bb+=c;
            }
        }

        return bb;
    }

    public static void main(String[] args) {
//        String aa="项目���设运行也是���萨克";
//        String bb = fitlerMessy(aa);
//        System.out.println(bb);
//        System.out.println("你好\\\\n你好");
    }

}
  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值