package com.boss.search;
import java.io.UnsupportedEncodingException;
import java.net.URLDecoder;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class ChineseUtil {
private static boolean isChinese(char c) {
Character.UnicodeBlock ub = Character.UnicodeBlock.of(c);
if (ub == Character.UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS
|| ub == Character.UnicodeBlock.CJK_COMPATIBILITY_IDEOGRAPHS
|| ub == Character.UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A
|| ub == Character.UnicodeBlock.GENERAL_PUNCTUATION
|| ub == Character.UnicodeBlock.CJK_SYMBOLS_AND_PUNCTUATION
|| ub == Character.UnicodeBlock.HALFWIDTH_AND_FULLWIDTH_FORMS) {
return true;
}
return false;
}
public static boolean isMessyCode(String strName) {
// Pattern p = Pattern.compile("\\s*|\t*|\r*|\n*");
Pattern p=Pattern.compile("\\?*");
Matcher m = p.matcher(strName);
String after = m.replaceAll("");
String temp = after.replaceAll("\\p{P}", "");
char[] ch = temp.trim().toCharArray();
float chLength = 0 ;
float count = 0;
for (int i = 0; i < ch.length; i++) {
char c = ch[i];
if (!Character.isLetterOrDigit(c)) {
if (!isChinese(c)) {
count = count + 1;
}
chLength++;
}
}
float result = count / chLength ;
if (result > 0.4) {
return true;
} else {
return false;
}
}
private static String toGb2312(String str) {
if (str == null) return null;
String retStr = str;
byte b[];
try {
b = str.getBytes();
for (int i = 0; i < b.length; i++) {
byte b1 = b[i];
if (b1 == 63)
break; //1
else if (b1 > 0)
continue;//2
else if (b1 < 0) { //不可能为0,0为字符串结束符
retStr = new String(b, "GBK");
break;
}
}
} catch (UnsupportedEncodingException e) {
// e.printStackTrace(); //To change body of catch statement use File | Settings | File Templates.
}
return retStr;
}
/* public static String toChinese(Object msg){
String tempMsg = TransformUtils.toString(msg) ;
if(isMessyCode(tempMsg)){
try {
return new String(tempMsg.getBytes("ISO8859-1"), "UTF-8");
} catch (Exception e) {
}
}
return tempMsg ;
}
*/
public static void main(String [] args) throws UnsupportedEncodingException
{
/*String str="鏃ユ湰";
Boolean bo = isRegularChinese(str);
str=new String(str.getBytes(),"UTF-8");
Boolean bu=isMessyCode("鍗撳崥");
String bc=toGb2312("涓");*/
//Boolean bu=isRegularChinese("??");
// System.out.println("-----"+bu);
String queryString1="NameKey=喂";
String nameKey ="人才"; // 关键字自动核对
//queryString1=URLDecoder.decode(queryString1,"UTF-8");
System.out.println("--rrr---"+queryString1);
//Pattern p=Pattern.compile("NameKey"); //确定是否乱码
// Matcher m = p.matcher(queryString1);
int dex=queryString1.indexOf("NameKey");
queryString1=queryString1.substring(queryString1.indexOf("NameKey")+8);
System.out.println("this is dex\t\t"+queryString1);
}
public static Boolean isRegularChinese(String str) {
Boolean bo=false;
char [] strchar=str.toCharArray();
for(int i=0;i<strchar.length;i++){
char c=strchar[i];
bo=isChinese(c);
if(bo==true){
System.out.println(c);
continue;
}else{
bo=false;
break;
}
}
return bo;
}
}
import java.io.UnsupportedEncodingException;
import java.net.URLDecoder;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class ChineseUtil {
private static boolean isChinese(char c) {
Character.UnicodeBlock ub = Character.UnicodeBlock.of(c);
if (ub == Character.UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS
|| ub == Character.UnicodeBlock.CJK_COMPATIBILITY_IDEOGRAPHS
|| ub == Character.UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A
|| ub == Character.UnicodeBlock.GENERAL_PUNCTUATION
|| ub == Character.UnicodeBlock.CJK_SYMBOLS_AND_PUNCTUATION
|| ub == Character.UnicodeBlock.HALFWIDTH_AND_FULLWIDTH_FORMS) {
return true;
}
return false;
}
public static boolean isMessyCode(String strName) {
// Pattern p = Pattern.compile("\\s*|\t*|\r*|\n*");
Pattern p=Pattern.compile("\\?*");
Matcher m = p.matcher(strName);
String after = m.replaceAll("");
String temp = after.replaceAll("\\p{P}", "");
char[] ch = temp.trim().toCharArray();
float chLength = 0 ;
float count = 0;
for (int i = 0; i < ch.length; i++) {
char c = ch[i];
if (!Character.isLetterOrDigit(c)) {
if (!isChinese(c)) {
count = count + 1;
}
chLength++;
}
}
float result = count / chLength ;
if (result > 0.4) {
return true;
} else {
return false;
}
}
private static String toGb2312(String str) {
if (str == null) return null;
String retStr = str;
byte b[];
try {
b = str.getBytes();
for (int i = 0; i < b.length; i++) {
byte b1 = b[i];
if (b1 == 63)
break; //1
else if (b1 > 0)
continue;//2
else if (b1 < 0) { //不可能为0,0为字符串结束符
retStr = new String(b, "GBK");
break;
}
}
} catch (UnsupportedEncodingException e) {
// e.printStackTrace(); //To change body of catch statement use File | Settings | File Templates.
}
return retStr;
}
/* public static String toChinese(Object msg){
String tempMsg = TransformUtils.toString(msg) ;
if(isMessyCode(tempMsg)){
try {
return new String(tempMsg.getBytes("ISO8859-1"), "UTF-8");
} catch (Exception e) {
}
}
return tempMsg ;
}
*/
public static void main(String [] args) throws UnsupportedEncodingException
{
/*String str="鏃ユ湰";
Boolean bo = isRegularChinese(str);
str=new String(str.getBytes(),"UTF-8");
Boolean bu=isMessyCode("鍗撳崥");
String bc=toGb2312("涓");*/
//Boolean bu=isRegularChinese("??");
// System.out.println("-----"+bu);
String queryString1="NameKey=喂";
String nameKey ="人才"; // 关键字自动核对
//queryString1=URLDecoder.decode(queryString1,"UTF-8");
System.out.println("--rrr---"+queryString1);
//Pattern p=Pattern.compile("NameKey"); //确定是否乱码
// Matcher m = p.matcher(queryString1);
int dex=queryString1.indexOf("NameKey");
queryString1=queryString1.substring(queryString1.indexOf("NameKey")+8);
System.out.println("this is dex\t\t"+queryString1);
}
public static Boolean isRegularChinese(String str) {
Boolean bo=false;
char [] strchar=str.toCharArray();
for(int i=0;i<strchar.length;i++){
char c=strchar[i];
bo=isChinese(c);
if(bo==true){
System.out.println(c);
continue;
}else{
bo=false;
break;
}
}
return bo;
}
}