代码中有两种方式实现
import java.io.IOException;
import java.io.UnsupportedEncodingException;
public class EncodingUtil {
public static String getUTF8StringFromGBKString(String gbkStr) {
try {
return new String(getUTF8BytesFromGBKString(gbkStr), "UTF-8");
} catch (UnsupportedEncodingException e) {
throw new InternalError();
}
}
public static byte[] getUTF8BytesFromGBKString(String gbkStr) {
int n = gbkStr.length();
byte[] utfBytes = new byte[3 * n];
int k = 0;
for (int i = 0; i < n; i++) {
int m = gbkStr.charAt(i);
if (m < 128 && m >= 0) {
utfBytes[k++] = (byte) m;
continue;
}
utfBytes[k++] = (byte) (0xe0 | (m >> 12));
utfBytes[k++] = (byte) (0x80 | ((m >> 6) & 0x3f));
utfBytes[k++] = (byte) (0x80 | (m & 0x3f));
}
if (k < utfBytes.length) {
byte[] tmp = new byte[k];
System.arraycopy(utfBytes, 0, tmp, 0, k);
return tmp;
}
return utfBytes;
}
public static String unicodeToUtf8(String theString) {
char aChar;
int len = theString.length();
StringBuffer outBuffer = new StringBuffer(len);
for (int x = 0; x < len;) {
aChar = theString.charAt(x++);
if (aChar == '\\') {
aChar = theString.charAt(x++);
if (aChar == 'u') {
// Read the xxxx
int value = 0;
for (int i = 0; i < 4; i++) {
aChar = theString.charAt(x++);
switch (aChar) {
case '0':
case '1':
case '2':
case '3':
case '4':
case '5':
case '6':
case '7':
case '8':
case '9':
value = (value << 4) + aChar - '0';
break;
case 'a':
case 'b':
case 'c':
case 'd':
case 'e':
case 'f':
value = (value << 4) + 10 + aChar - 'a';
break;
case 'A':
case 'B':
case 'C':
case 'D':
case 'E':
case 'F':
value = (value << 4) + 10 + aChar - 'A';
break;
default:
throw new IllegalArgumentException(
"Malformed \\uxxxx encoding.");
}
}
outBuffer.append((char) value);
} else {
if (aChar == 't')
aChar = '\t';
else if (aChar == 'r')
aChar = '\r';
else if (aChar == 'n')
aChar = '\n';
else if (aChar == 'f')
aChar = '\f';
outBuffer.append(aChar);
}
} else
outBuffer.append(aChar);
}
return outBuffer.toString();
}
/**
* @title converToUnicode
* @description
* @param: str
* @updateTime 2021/11/23 10:23
* @return: java.lang.String
* @throws
*/
public static String converToUnicode(String str)
{
str = (str == null ? "" : str);
String tmp;
StringBuffer sb = new StringBuffer(1000);
char c;
int i, j;
sb.setLength(0);
for (i = 0; i < str.length(); i++)
{
c = str.charAt(i);
sb.append("\\u");
j = (c >>>8); //取出高8位
tmp = Integer.toHexString(j);
if (tmp.length() == 1)
sb.append("0");
sb.append(tmp);
j = (c & 0xFF); //取出低8位
tmp = Integer.toHexString(j);
if (tmp.length() == 1)
sb.append("0");
sb.append(tmp);
}
return (new String(sb));
}
public static String convertToUnicodeToUtf8(String str){
return unicodeToUtf8(converToUnicode(str));
}
public static void main(String[] args) throws IOException {
String s = converToUnicode("中国");
System.out.println(unicodeToUtf8(s));
System.out.println(unicodeToUtf8("\\u60a8\\u597d\\uff0c\\u60a8\\u6709\\u5f85\\u529e\\u8bf7\\u5904\\u7406\\uff08\\u5982\\u5df2\\u5904\\u7406\\uff0c\\u8bf7\\u5ffd\\u7565\\uff09\\uff0c\\u8c22\\u8c22\\u3002"));
}
}