转载：Java：转换汉字为unicode形…

最新推荐文章于 2024-07-24 18:18:36 发布

rxcai

最新推荐文章于 2024-07-24 18:18:36 发布

阅读量668

点赞数

分类专栏： Java

本文链接：https://blog.csdn.net/crxmai/article/details/16845805

版权

Java 专栏收录该内容

27 篇文章 0 订阅

订阅专栏

转篇文章：Java：转换汉字为unicode形式的字符串和转换unicode形式字符串转换成汉字

出处：http://www.cppblog.com/biao/archive/2010/12/21/137087.aspx

import java.io.UnsupportedEncodingException;

public class UnicodeConverter {

public static void main(String[] args) throws UnsupportedEncodingException {

String s = "黄 \t彪\u5F6A";

System.out.println("Original:\t\t" + s);

s = toEncodedUnicode(s, true);

System.out.println("to unicode:\t\t" + s);

s = fromEncodedUnicode(s.toCharArray(), 0, s.length());

System.out.println("from unicode:\t" + s);

}

private static final char[] hexDigit = { '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A',

'B', 'C', 'D', 'E', 'F' };

private static char toHex(int nibble) {

return hexDigit[(nibble & 0xF)];

}

public static String toEncodedUnicode(String theString, boolean escapeSpace) {

int len = theString.length();

int bufLen = len * 2;

if (bufLen < 0) {

bufLen = Integer.MAX_VALUE;

}

StringBuffer outBuffer = new StringBuffer(bufLen);

for (int x = 0; x < len; x++) {

char aChar = theString.charAt(x);

// Handle common case first, selecting largest block that

// avoids the specials below

if ((aChar > 61) && (aChar < 127)) {

if (aChar == '\\') {

outBuffer.append('\\');

outBuffer.append('\\');

continue;

}

outBuffer.append(aChar);

continue;

}

switch (aChar) {

case ' ':

if (x == 0 || escapeSpace) outBuffer.append('\\');

outBuffer.append(' ');

break;

case '\t':

outBuffer.append('\\');

outBuffer.append('t');

break;

case '\n':

outBuffer.append('\\');

outBuffer.append('n');

break;

case '\r':

outBuffer.append('\\');

outBuffer.append('r');

break;

case '\f':

outBuffer.append('\\');

outBuffer.append('f');

break;

case '=': // Fall through

case ':': // Fall through

case '#': // Fall through

case '!':

outBuffer.append('\\');

outBuffer.append(aChar);

break;

default:

if ((aChar < 0x0020) || (aChar > 0x007e)) {

// 每个unicode有16位，每四位对应的16进制从高位保存到低位

outBuffer.append('\\');

outBuffer.append('u');

outBuffer.append(toHex((aChar >> 12) & 0xF));

outBuffer.append(toHex((aChar >> 8) & 0xF));

outBuffer.append(toHex((aChar >> 4) & 0xF));

outBuffer.append(toHex(aChar & 0xF));

} else {

outBuffer.append(aChar);

}

}

}

return outBuffer.toString();

}

public static String fromEncodedUnicode(char[] in, int off, int len) {

char aChar;

char[] out = new char[len]; // 只短不长

int outLen = 0;

int end = off + len;

while (off < end) {

aChar = in[off++];

if (aChar == '\\') {

aChar = in[off++];

if (aChar == 'u') {

// Read the xxxx

int value = 0;

for (int i = 0; i < 4; i++) {

aChar = in[off++];

switch (aChar) {

case '0':

case '1':

case '2':

case '3':

case '4':

case '5':

case '6':

case '7':

case '8':

case '9':

value = (value << 4) + aChar - '0';

break;

case 'a':

case 'b':

case 'c':

case 'd':

case 'e':

case 'f':

value = (value << 4) + 10 + aChar - 'a';

break;

case 'A':

case 'B':

case 'C':

case 'D':

case 'E':

case 'F':

value = (value << 4) + 10 + aChar - 'A';

break;

default:

throw new IllegalArgumentException("Malformed \\uxxxx encoding.");

}

}

out[outLen++] = (char) value;

} else {

if (aChar == 't') {

aChar = '\t';

} else if (aChar == 'r') {

aChar = '\r';

} else if (aChar == 'n') {

aChar = '\n';

} else if (aChar == 'f') {

aChar = '\f';

}

out[outLen++] = aChar;

}

} else {

out[outLen++] = (char) aChar;

}

}

return new String(out, 0, outLen);

}

}