实现思路大致如下:
- 取得一个汉字的Unicode码
- 把Unicode码分解为两个16进制数据字符串(丢弃前两个字节)
- 把这两个16进制数据字符串转换成二进制数据字符串
- 把二进制数据字符串分解为三个串,第一个串为4(0~4)个位,在高位加上标记位“1110”,第二(4~10)、三个(10~16)串均为6个位,分别在高位加上“10”标记位
- 把这三个二进制串分别转换为10进制数据并赋值给字节型数组
- 根据这个字节型数组构造UTF-8字符
java 代码
- import java.io.File;
- import java.io.FileOutputStream;
- import java.io.UnsupportedEncodingException;
- /**
- * 2007-8-10 jyin at gomez dot com
- */
- public class CharsetConvertor {
- public static void main(String[] args) {
- String str = "This is a test for *中网!@#$。,?" ;
- try {
- File f = new File( "D:/test.txt" );
- FileOutputStream fio = new FileOutputStream(f);
- String s = gbToUtf8(str);
- fio.write(s.getBytes( "UTF-8" ));
- fio.close();
- }
- catch (Exception e) {
- e.printStackTrace();
- }
- }
- public static String gbToUtf8(String str) throws UnsupportedEncodingException {
- StringBuffer sb = new StringBuffer();
- for ( int i = 0 ; i < str.length(); i++) {
- String s = str.substring(i, i + 1 );
- if (s.charAt( 0 ) > 0x80 ) {
- byte [] bytes = s.getBytes( "Unicode" );
- String binaryStr = "" ;
- for ( int j = 2 ; j < bytes.length; j += 2 ) {
- // the first byte
- String hexStr = getHexString(bytes[j + 1 ]);
- String binStr = getBinaryString(Integer.valueOf(hexStr, 16 ));
- binaryStr += binStr;
- // the second byte
- hexStr = getHexString(bytes[j]);
- binStr = getBinaryString(Integer.valueOf(hexStr, 16 ));
- binaryStr += binStr;
- }
- // convert unicode to utf-8
- String s1 = "1110" + binaryStr.substring( 0 , 4 );
- String s2 = "10" + binaryStr.substring( 4 , 10 );
- String s3 = "10" + binaryStr.substring( 10 , 16 );
- byte [] bs = new byte [ 3 ];
- bs[ 0 ] = Integer.valueOf(s1, 2 ).byteValue();
- bs[ 1 ] = Integer.valueOf(s2, 2 ).byteValue();
- bs[ 2 ] = Integer.valueOf(s3, 2 ).byteValue();
- String ss = new String(bs, "UTF-8" );
- sb.append(ss);
- } else {
- sb.append(s);
- }
- }
- return sb.toString();
- }
- private static String getHexString( byte b) {
- String hexStr = Integer.toHexString(b);
- int m = hexStr.length();
- if (m < 2 ) {
- hexStr = "0" + hexStr;
- } else {
- hexStr = hexStr.substring(m - 2 );
- }
- return hexStr;
- }
- private static String getBinaryString( int i) {
- String binaryStr = Integer.toBinaryString(i);
- int length = binaryStr.length();
- for ( int l = 0 ; l < 8 - length; l++) {
- binaryStr = "0" + binaryStr;
- }
- return binaryStr;
- }
- }