转载:Java:转换汉字为unicode形…

转篇文章:Java:转换汉字为unicode形式的字符串和转换unicode形式字符串转换成汉字

出处:http://www.cppblog.com/biao/archive/2010/12/21/137087.aspx





import java.io.UnsupportedEncodingException;



public class UnicodeConverter {

public static void main(String[] args) throws UnsupportedEncodingException {

String s = "黄 \t彪\u5F6A";

System.out.println("Original:\t\t" + s);



s = toEncodedUnicode(s, true);

System.out.println("to unicode:\t\t" + s);



s = fromEncodedUnicode(s.toCharArray(), 0, s.length());

System.out.println("from unicode:\t" + s);

}



private static final char[] hexDigit = { '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A',

'B', 'C', 'D', 'E', 'F' };



private static char toHex(int nibble) {

return hexDigit[(nibble & 0xF)];

}





public static String toEncodedUnicode(String theString, boolean escapeSpace) {

int len = theString.length();

int bufLen = len * 2;

if (bufLen < 0) {

bufLen = Integer.MAX_VALUE;

}

StringBuffer outBuffer = new StringBuffer(bufLen);



for (int x = 0; x < len; x++) {

char aChar = theString.charAt(x);

// Handle common case first, selecting largest block that

// avoids the specials below

if ((aChar > 61) && (aChar < 127)) {

if (aChar == '\\') {

outBuffer.append('\\');

outBuffer.append('\\');

continue;

}

outBuffer.append(aChar);

continue;

}



switch (aChar) {

case ' ':

if (x == 0 || escapeSpace) outBuffer.append('\\');

outBuffer.append(' ');

break;

case '\t':

outBuffer.append('\\');

outBuffer.append('t');

break;

case '\n':

outBuffer.append('\\');

outBuffer.append('n');

break;

case '\r':

outBuffer.append('\\');

outBuffer.append('r');

break;

case '\f':

outBuffer.append('\\');

outBuffer.append('f');

break;

case '=': // Fall through

case ':': // Fall through

case '#': // Fall through

case '!':

outBuffer.append('\\');

outBuffer.append(aChar);

break;

default:

if ((aChar < 0x0020) || (aChar > 0x007e)) {

// 每个unicode有16位,每四位对应的16进制从高位保存到低位

outBuffer.append('\\');

outBuffer.append('u');

outBuffer.append(toHex((aChar >> 12) & 0xF));

outBuffer.append(toHex((aChar >> 8) & 0xF));

outBuffer.append(toHex((aChar >> 4) & 0xF));

outBuffer.append(toHex(aChar & 0xF));

} else {

outBuffer.append(aChar);

}

}

}

return outBuffer.toString();

}





public static String fromEncodedUnicode(char[] in, int off, int len) {

char aChar;

char[] out = new char[len]; // 只短不长

int outLen = 0;

int end = off + len;



while (off < end) {

aChar = in[off++];

if (aChar == '\\') {

aChar = in[off++];

if (aChar == 'u') {

// Read the xxxx

int value = 0;

for (int i = 0; i < 4; i++) {

aChar = in[off++];

switch (aChar) {

case '0':

case '1':

case '2':

case '3':

case '4':

case '5':

case '6':

case '7':

case '8':

case '9':

value = (value << 4) + aChar - '0';

break;

case 'a':

case 'b':

case 'c':

case 'd':

case 'e':

case 'f':

value = (value << 4) + 10 + aChar - 'a';

break;

case 'A':

case 'B':

case 'C':

case 'D':

case 'E':

case 'F':

value = (value << 4) + 10 + aChar - 'A';

break;

default:

throw new IllegalArgumentException("Malformed \\uxxxx encoding.");

}

}

out[outLen++] = (char) value;

} else {

if (aChar == 't') {

aChar = '\t';

} else if (aChar == 'r') {

aChar = '\r';

} else if (aChar == 'n') {

aChar = '\n';

} else if (aChar == 'f') {

aChar = '\f';

}

out[outLen++] = aChar;

}

} else {

out[outLen++] = (char) aChar;

}

}

return new String(out, 0, outLen);

}

}

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值