java 读入unicode string 简单版本,示例: \u674e\u661f
<<<<<<<<<<
public static String readUnicodeStr(String unicodeStr){
StringBuilder buf = new StringBuilder();
//因为java转义和正则转义,所以u要这么写
String[] cc = unicodeStr.split("\\\\u");
for (String c : cc) {
if(c.equals(""))
continue;
int cInt = Integer.parseInt(c, 16);
char cChar = (char)cInt;
buf.append(cChar);
}
return buf.toString();
}
>>>>>>>>>>
java 读入unicode,增加对unicode串中包含的英文的处理,示例:tb\u674ea\u661fb
<<<<<<<<<<<<<
// tb\u674ea\u661fb
public static String readUnicodeStr2(String unicodeStr) {
StringBuilder buf = new StringBuilder();
for (int i = 0; i < unicodeStr.length(); i++) {
char char1 = unicodeStr.charAt(i);
if (char1 == '\\' && isUnicode(unicodeStr, i)) {
String cStr = unicodeStr.substring(i + 2, i + 6);
int cInt = Integer.parseInt(cStr,16);
buf.append((char) cInt);
// 跨过当前unicode码,因为还有i++,所以这里i加5,而不是6
i = i + 5;
} else {
buf.append(char1);
}
}
return buf.toString();
}
// 判断以index从i开始的串,是不是unicode码
private static boolean isUnicode(String unicodeStr, int i) {
int len = unicodeStr.length();
int remain = len - i;
// unicode码,反斜杠后还有5个字符 uxxxx
if (remain < 5)
return false;
char flag2 = unicodeStr.charAt(i + 1);
if (flag2 != 'u')
return false;
String nextFour = unicodeStr.substring(i + 2, i + 6);
return isHexStr(nextFour);
}
/** hex str 1-9 a-f A-F */
private static boolean isHexStr(String str) {
for (int i = 0; i < str.length(); i++) {
char ch = str.charAt(i);
boolean isHex = ch >= '1' && ch <= '9' || ch >= 'a' && ch <= 'f'
|| ch => 'A' && ch <= 'F';
if (!isHex)
return false;
}
return true;
}
public static void main(String[] args) {
String output = null;
// tb\u674ea\u661fb
if(args.length==1)
output = readUnicodeStr2(args[0]);
else{
output = readUnicodeStr(args[1]);
}
System.out.println(output);
}
>>>>>>>>>>
运行测试:java A 'tb\u674ea\u661fb'
记着带引号