最近解决了一个困扰了很长时间的问题,就是关于中文乱码的处理。。。找到相关方法
1.。如下:
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class MessyCodeCheck {
public static boolean isChinese(char c) {
Character.UnicodeBlock ub = Character.UnicodeBlock.of(c);
if (ub == Character.UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS
|| ub == Character.UnicodeBlock.CJK_COMPATIBILITY_IDEOGRAPHS
|| ub == Character.UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A
|| ub == Character.UnicodeBlock.GENERAL_PUNCTUATION
|| ub == Character.UnicodeBlock.CJK_SYMBOLS_AND_PUNCTUATION
|| ub == Character.UnicodeBlock.HALFWIDTH_AND_FULLWIDTH_FORMS) {
return true;
}
return false;
}
public static boolean isMessyCode(String strName) {
Pattern p = Pattern.compile("\\s*|\t*|\r*|\n*");
Matcher m = p.matcher(strName);
String after = m.replaceAll("");
String temp = after.replaceAll("\\p{P}", "");
char[] ch = temp.trim().toCharArray();
float chLength = ch.length;
float count = 0;
for (int i = 0; i < ch.length; i++) {
char c = ch[i];
if (!Character.isLetterOrDigit(c)) {
if (!isChinese(c)) {
count = count + 1;
System.out.print(c);
}
}
}
float result = count / chLength;
if (result > 0.4) {
return true;
} else {
return false;
}
}
public static void main(String[] args) {
System.out.println(isMessyCode("*??JTP.jar??????JTP?????????????"));
System.out.println(isMessyCode("你好"));
}
}
2.如下:
/**
* 用getBytes(encoding):返回字符串的一个byte数组
* 当b[0]为 63时,应该是转码错误
* A、不乱码的汉字字符串:
* 1、encoding用GB2312时,每byte是负数;
* 2、encoding用ISO8859_1时,b[i]全是63。
* B、乱码的汉字字符串:
* 1、encoding用ISO8859_1时,每byte也是负数;
* 2、encoding用GB2312时,b[i]大部分是63。
* C、英文字符串
* 1、encoding用ISO8859_1和GB2312时,每byte都大于0;
* <p/>
* 总结:给定一个字符串,用getBytes("iso8859_1")
* 1、如果b[i]有63,不用转码; A-2
* 2、如果b[i]全大于0,那么为英文字符串,不用转码; B-1
* 3、如果b[i]有小于0的,那么已经乱码,要转码。 C-1
*/
private static String toGb2312(String str) {
if (str == null) return null;
String retStr = str;
byte b[];
try {
b = str.getBytes("ISO8859_1");
for (int i = 0; i < b.length; i++) {
byte b1 = b[i];
if (b1 == 63)
break; //1
else if (b1 > 0)
continue;//2
else if (b1 < 0) { //不可能为0,0为字符串结束符
retStr = new String(b, "GB2312");
break;
}
}
} catch (UnsupportedEncodingException e) {
// e.printStackTrace(); //To change body of catch statement use File | Settings | File Templates.
}
return retStr;
}
---------------------------------------------------------------
String str = "";
if (request.getParameter("input") != null) {
str = request.getParameter("input");
if (str.length() == str.getBytes().length)
System.out.println("半角 character");
else
System.out.println("全角 character");
}
过滤乱码(可以哦,今天我去试了,用String newstr=new String(oldstr,"跟换不同编码集例如utf-8");测试newstr是否为乱码,若为乱码继续跟换编码集生成newstr。。。一直到判断为不是乱码为止)希望高手多多指点。。。。