由于项目需要, 需要把GBK编码的字符串转成UTF-8编码, 网上随手搜了一下, 找到些样例代码, 可以用, 不过很恶心, 竟然把字符数值转成二进制的字符串形式进行操作, 于是自己动手根据编码规则用位操作写了一个...也没几行...有时还真不能太懒...
import java.io.UnsupportedEncodingException;
/**
* @author GChan
*
*/
public class CharacterEncodeConverter {
/**
* The main method.
*
* @param args
* the arguments
*/
public static void main(String[] args) {
try {
CharacterEncodeConverter convert = new CharacterEncodeConverter();
byte[] src = new byte[5];
src[0] = (byte) 0xD6;
src[1] = (byte) 0xD0;
src[2] = (byte) 0x32;
src[3] = (byte) 0xCE;
src[4] = (byte) 0xC4;
byte[] fullByte = convert.gbk2utf8byte(new String(src, "GBK"));
String fullStr = new String(fullByte, "UTF-8");
System.out.println("string from GBK to UTF-8 byte: " + fullStr);
} catch (Exception e) {
e.printStackTrace();
}
}
/**
* gbk2utf8byte.
*
* @param chinese
* the chinese
*
* @return the byte[]
*/
public byte[] gbk2utf8byte(String chinese) {
// Step 1: 得到GBK编码下的字符数组,一个中文字符对应这里的一个c[i]
char c[] = chinese.toCharArray();
// Step 2: UTF-8使用3个字节存放一个中文字符,假设全部为中文,则最大长度为字符数年的3倍
byte[] fullByte = new byte[3 * c.length];
// Step 3: 循环将字符的编码转换成UTF-8编码
int counter = 0;
for (int i = 0; i < c.length; i++) {
// Step 3-1:将字符的ASCII编码转换成数值
int m = (int) c[i];
// ASCII 直接下一字符
if (m <= 127) {
fullByte[counter++] = (byte) m;
continue;
}
// 编码格式:1110[xxxx] 10[xxxxxx] 10[xxxxxx]
// 1110 和前四位构成第一字节
int b0 = ((m & 0xF000) >>> 12) | 0xE0;
// 10 和中间六位构成第二字节
int b1 = ((m & 0x0FC0) >>> 6) | 0x80;
// 10 和最后六位构成第三字节
int b2 = (m & 0x003F) | 0x80;
fullByte[counter++] = (byte) b0;
fullByte[counter++] = (byte) b1;
fullByte[counter++] = (byte) b2;
// 继续解析下一个中文字符
}
// 根据实际编码长度生成新数组返回
byte[] result = new byte[counter];
System.arraycopy(fullByte, 0, result, 0, counter);
return result;
}
/**
* gbk2utf8.
* @param chinese
* @return
*/
public String gbk2utf8(String chinese) {
String result = null;
try {
result = new String(gbk2utf8byte(chinese), "UTF-8");
} catch (UnsupportedEncodingException e) {
e.printStackTrace();
}
return result;
}
}