//用于检测乱码问题
public static boolean isMessyCode(String strName) {
Pattern p = Pattern.compile("\\s*|\t*|\r*|\n*");
Matcher m = p.matcher(strName);
String after = m.replaceAll("");
String temp = after.replaceAll("\\p{P}", "");
char[] ch = temp.trim().toCharArray();
float chLength = 0;
float count = 0;
for (int i = 0; i < ch.length; i++) {
char c = ch[i];
if (!Character.isLetterOrDigit(c)) {
if (!isChinese(c)) {
count = count + 1;
}
chLength++;
}
}
float result = count / chLength;
if (result > 0.4) {
return true;
} else {
return false;
}
}
private static boolean isChinese(char c) {
Character.UnicodeBlock ub = Character.UnicodeBlock.of(c);
if (ub == Character.UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS
|| ub == Character.UnicodeBlock.CJK_COMPATIBILITY_IDEOGRAPHS
|| ub == Character.UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A
|| ub == Character.UnicodeBlock.GENERAL_PUNCTUATION
|| ub == Character.UnicodeBlock.CJK_SYMBOLS_AND_PUNCTUATION
|| ub == Character.UnicodeBlock.HALFWIDTH_AND_FULLWIDTH_FORMS) {
return true;
}
return false;
}
public static void main(String[] args) throws Exception {
System.out.println("希望阿姨没事\uD83D\uDE4F");
// String str="希望阿姨没事\uD83D\uDE4F";
// System.out.println(split[0]);
// str = new String(str.getBytes("UTF-8"),"utf-8");
// System.out.println(isMessyCode(str));
}
public static String toChinese(String msg) {
if (isMessyCode(msg)) {
try {
return new String(msg.getBytes("ISO8859-1"), "UTF-8");
} catch (Exception e) {
}
}
return msg;
}
/**
2 * 字符串是否包含中文
3 *
4 * @param str 待校验字符串
5 * @return true 包含中文字符 false 不包含中文字符
6 * @throws EmptyException
7 */
/**
* 字符串是否包含中文
*
* @param str 待校验字符串
* @return true 包含中文字符 false 不包含中文字符
* @throws Exception
*/
public static boolean isContainChinese(String str) throws Exception {
if (StringUtils.isEmpty(str)) {
throw new Exception("sms context is empty!");
}
Pattern p = Pattern.compile("[\u4E00-\u9FA5|\\!|\\,|\\。|\\(|\\)|\\《|\\》|\\“|\\”|\\?|\\:|\\;|\\【|\\】]");
Matcher m = p.matcher(str);
return m.find();
}
检测乱码问题
最新推荐文章于 2022-10-02 23:22:55 发布