java UnicodeHtml.
public class UnicodeHtml {
/**
* @param args
*/
public static void main(String[] args) {
// TODO Auto-generated method stub
String inStr = "dfadf\\uCN吉林长春|None|CERNET|1|None。";
// String inStr = "吉林长春";
String unicodeStr = "";
//
// unicodeStr = UnicodeHtml.chinaToUnicode(inStr);
// System.out.println(unicodeStr);
unicodeStr = UnicodeHtml.string2unicode(inStr);
System.out.println(unicodeStr);
System.out.println(unicode2string(unicodeStr));
}
/**
* 把中文转成Unicode码
*
* @param str
* @return
*/
public static String string2unicode(String str) {
str = (str == null ? "" : str);
String tmp;
StringBuffer sb = new StringBuffer(1000);
char c;
int i, j;
sb.setLength(0);
for (i = 0; i < str.length(); i++) {
c = str.charAt(i);
// sb.append("\\u");
sb.append("&#x");
j = (c >>> 8);
tmp = Integer.toHexString(j);
if (tmp.length() == 1)
sb.append("0");
sb.append(tmp);
j = (c & 0xFF);
tmp = Integer.toHexString(j);
if (tmp.length() == 1)
sb.append("0");
sb.append(tmp);
sb.append(";");
}
return (new String(sb));
}
/**
* 把Unicode码 转为中文
* 只能转换全是Unicode字符,不能是混合型
* @param
* @return
*/
public static String unicode2string(String str) {
str = (str == null ? "" : str);
if (str.indexOf("&#x") == -1 || str.indexOf(";") == -1 )
return str;
StringBuffer sb = new StringBuffer(1000);
for (int i = 0; i <= str.length() - 8;) {
String strTemp = str.substring(i, i + 8);
String value = strTemp.substring(3);
value = value.substring(0,4);
int c = 0;
for (int j = 0; j < value.length(); j++) {
char tempChar = value.charAt(j);
int t = 0;
switch (tempChar) {
case 'a':
t = 10;
break;
case 'b':
t = 11;
break;
case 'c':
t = 12;
break;
case 'd':
t = 13;
break;
case 'e':
t = 14;
break;
case 'f':
t = 15;
break;
default:
t = tempChar - 48;
break;
}
c += t * ((int) Math.pow(16, (value.length() - j - 1)));
}
sb.append((char) c);
i = i + 8;
}
return sb.toString();
}
/**
* 把中文转成Unicode码
* 只将中文转换成Unicode,英文其他字符不做处理
* @param str
* @return
*/
public static String chinaToUnicode(String str) {
String result = "";
for (int i = 0; i < str.length(); i++) {
int chr1 = (char) str.charAt(i);
if (chr1 >= 19968 && chr1 <= 171941) {// 汉字范围 \u4e00-\u9fa5 (中文)
// result += "\\u" + Integer.toHexString(chr1);
result += "&#x" + Integer.toHexString(chr1)+";";
} else {
result += str.charAt(i);
}
}
return result;
}
}