package com.convert.gbkutf;
import java.io.UnsupportedEncodingException;
import java.util.ArrayList;
import java.util.List;
public class UTF2GBK {
public static void main(String[] args) {
try {
String s = "幅als;阿萨德fjf;a;ad阿sd☆▲←◎◆f斯蒂芬la ;;jkhj阿a┮┬┿┽ノヌナヌスㄇㄎㄑㄇsdf士大夫udfoasdfahh阿萨德发sasdfkj撒旦法df";
System.out.println("string : " + s);
String utfStr = UTF2GBK.gbk2utf8(s);
System.out.println("string from GBK to UTF-8 byte: " + utfStr);
String gbkStr = UTF2GBK.utf82gbk(utfStr);
System.out.println("string from UTF-8 to GBK byte: " + gbkStr);
} catch (Exception e) {
e.printStackTrace();
}
}
public static String gbk2utf8(String chenese) throws UnsupportedEncodingException {
// Step 1: 得到GBK编码下的字符数组,一个中文字符对应这里的一个c[i]
char c[] = chenese.toCharArray();
// Step 2: UTF-8使用3个字节存放一个中文字符,所以长度必须为字符的3倍
List<Byte> fullByte = new ArrayList<Byte>(0);
// Step 3: 循环将字符的GBK编码转换成UTF-8编码
for (int i = 0; i < c.length; i++) {
if(String.valueOf(c[i]).getBytes().length > 1){
// Step 3-1:将字符的ASCII编码转换成2进制值
int m = (int) c[i];
String word = Integer.toBinaryString(m);
// Step 3-2:将2进制值补足16位(2个字节的长度)
StringBuffer sb = new StringBuffer();
int len = 16 - word.length();
for (int j = 0; j < len; j++) {
sb.append("0");
}
// Step 3-3:得到该字符最终的2进制GBK编码
// 形似:1000 0010 0111 1010
sb.append(word);
// Step 3-4:最关键的步骤,根据UTF-8的汉字编码规则,首字节
// 以1110开头,次字节以10开头,第3字节以10开头。在原始的2进制
// 字符串中插入标志位。最终的长度从16--->16+3+2+2=24。
sb.insert(0, "1110");
sb.insert(8, "10");
sb.insert(16, "10");
//System.out.println(sb.toString());
// Step 3-5:将新的字符串进行分段截取,截为3个字节
String s1 = sb.substring(0, 8);
String s2 = sb.substring(8, 16);
String s3 = sb.substring(16);
// Step 3-6:最后的步骤,把代表3个字节的字符串按2进制的方式
// 进行转换,变成2进制的整数,再转换成16进制值
byte b0 = Integer.valueOf(s1, 2).byteValue();
byte b1 = Integer.valueOf(s2, 2).byteValue();
byte b2 = Integer.valueOf(s3, 2).byteValue();
// Step 3-7:把转换后的3个字节按顺序存放到字节数组的对应位置
byte[] bf = new byte[3];
bf[0] = b0;
bf[1] = b1;
bf[2] = b2;
fullByte.add(b0);
fullByte.add(b1);
fullByte.add(b2);
}else{
fullByte.add(String.valueOf(c[i]).getBytes()[0]);
}
// Step 3-8:返回继续解析下一个中文字符
}
byte[] bs = new byte[fullByte.size()];
for(int i = 0 ; i < fullByte.size() ; i ++){
bs[i] = fullByte.get(i);
}
return new String(bs, "UTF-8");
}
public static String utf82gbk(String chenese) throws UnsupportedEncodingException{
StringBuffer sb = new StringBuffer();
byte[] ybs = chenese.getBytes("UTF-8");
int cnt = 0;
char c[] = chenese.toCharArray();
for(int i = 0 ; i < c.length ; i ++){
if(String.valueOf(c[i]).getBytes().length > 1){
String s2 = getBinaryStrFromByte(ybs[cnt]) + getBinaryStrFromByte(ybs[cnt + 1]) + getBinaryStrFromByte(ybs[cnt + 2]);
String s3 = s2.substring(4, 8) + s2.substring(10, 16) + s2.substring(18, 24);
sb.append(toChar(s3));
cnt += 3;
}else{
String s2 = getBinaryStrFromByte(ybs[cnt]);
sb.append(toChar(s2));
cnt += 1;
}
}
return sb.toString();
}
/**
* 把byte转化成2进制字符串
* @param b
* @return
*/
public static String getBinaryStrFromByte(byte b){
String result = "";
byte a = b; ;
for (int i = 0; i < 8; i ++){
byte c = a;
a = (byte)(a >> 1);//每移一位如同将10进制数除以2并去掉余数。
a = (byte)(a << 1);
if(a == c){
result = "0" + result;
}else{
result = "1" + result;
}
a = (byte)(a >> 1);
}
return result;
}
//把2进制转换成CHAR
private static char toChar(String binStr){
int[] temp = binStrToIntArray(binStr);
int sum = 0;
for(int i = 0; i < temp.length; i ++){
sum += temp[temp.length - 1 - i] << i;
}
return (char)sum;
}
//将二进制字符串转换成int数组
private static int[] binStrToIntArray(String binStr){
char[] temp = binStr.toCharArray();
int[] result = new int[temp.length];
for(int i = 0; i < temp.length; i ++){
result[i] = temp[i] - 48;
}
return result;
}
}
@@@cjx@@@{"yingyu":{"word":{"M2U1":{"Thank you.":"谢谢你。","I can dance.":"我会跳舞。","can":"会;能够","Touch your face.":"摸摸你的脸。","I can draw.":"我会画画。","you":"你","house":"房子","I":"我","How nice!":"多么漂亮!","draw":"阿花","What can you do, Danny?":"你会做什么,丹尼?","I can read.":"我会读书。","What can you do, Kitty?":"你会做什么,凯蒂?","I can sing.":"我会唱歌。","what":"什么","read a book.":"看书。","sing":"唱歌","This is my face.":"这是我的脸。","read":"读","do":"做","dance":"跳舞","flower":"花","What can you do?":"你会做什么?"},"M2U2":{"Fine. Thank you":"好的。谢谢你。","Thank you":"谢谢你","grandfather":"爷爷","She\u0027s my mother.":"她是我的妈妈。","who":"谁","father":"爸爸","sister":"姐妹","Who is he?":"他是谁?","This is me.":"这是我。","she":"她","me":"我(宾格)","Who is she?":"她是谁?","fine":"好的","He\u0027s Eddie.":"他是艾迪。","how":"怎么样","She\u0027s Alice.":"她是爱丽丝。","he":"他","my":"我的","grandmother":"奶奶","This is my grandmother.":"这是我的奶奶。","I can draw a flower.":"我能画一朵花。","How are you?":"你身体怎么样?","mother":"妈妈","He\u0027s my father.":"他是我的 爸爸。","brother":"兄弟","What can you do?":"你能做什么?"},"M2U3":{"He can read. She can sing.":"他会读书,她会唱歌。","He can draw.":"他会画画。","ten":"10","She can sing.":"她会唱歌。","Kitty":"凯蒂","He\u0027s fat.":"他是胖的。","Eddie":"艾迪","one":"1","nine":"9","She\u0027s my friend.":"她是我的朋友。","Tall girl, short girl. I see you.":"高个子女孩,矮个子女孩。我看见你了。","eight":"6","Ann":"安","six":"6","short":"矮的","girl":"女孩","This is Ben.":"这是Ben。","Who is he?":"他是谁?","Fat boy, thin boy. One and two.":"胖的男孩,瘦的男孩。一和二。","three":"3","Who is she?":"她是谁?","four":"4","He\u0027s my friend.":"他是我的朋友。","Danny":"丹尼","tall":"高的","I can dance.":"我会跳舞。","look":"看","five":"5","He\u0027s my classmate.":"他是我的同班同学。","fat":"胖的","class":"班级","What can he do?":"他会做什么?","classmate":"同班同学","two":"2","seven":"7","What can she do?":"她会做什么?","She\u0027s my classmate.":"她是我的同班同学。","thin":"瘦的","boy":"男孩","friend":"朋友","What can you do?":"你会做什么?"},"M1U3":{"face":"脸","It\u0027s you.":"是你。","Good morning.":"早上好。","Hello, Alice":"你好,爱丽丝。","Hello! I\u0027m Jack.":"你好!我是杰克。","Goodbye! Alice.":"再见!爱丽丝。","How nice!":"多么漂亮!","Hi! Jack.":"你好!杰克。","eye":"眼睛","look":"看","It\u0027s not me.":"不是我。","Thank you.":"谢谢你。","is":"是","Jack":"杰克","Hello, Eddie.":"你好,凯蒂。","I\u0027m fine. Thank you.":"我很好。谢谢。","mouth":"嘴巴","your":"你的","Touch your face.":"摸摸你的脸。","Good afternoon":"下午好。","my":"我的","Alice, touch your ear.":"爱丽丝,摸摸你的耳朵。","How are you?":"你好吗?","Here you are.":"给你。","touch":"摸摸,触摸。","ear":"耳朵","This is my face.":"着是我的脸。","This is my ear":"这是我的耳朵","Give me a rubber,please.":"请给我一块橡皮。","nose":"鼻子","this":"这","Goodbye! Miss Fang.":"再见!方小姐。"},"M1U2":{"Good morning.":"早上好。","Hello! I\u0027m Tom.":"你好!我是汤姆。","I can see.":"我能看见。","give":"给","Hello! Tom.":"你好!汤姆。","For you and me.":"给你和我。","Goodbye, Miss Fang.":"再见,方小姐。","How nice!":"多么漂亮!","A rubber!":"一块橡皮。","please":"请","me":"我","A book!":"一本书。","Goodbye, Alice":"再见,爱丽丝。","Thank you.":"谢谢你。","Give me a book, please.":"请给我一本书。","Good afternoon.":"下午好。","rubber":"橡皮","bag":"包","I\u0027m fine. Thank you.":"我很好,谢谢。","Good afternoon, Kitty":"下午好,凯蒂。","you":"你","Good afternoon, Alice":"下午好,爱丽丝。","ruler":"尺","pencil":"铅笔","pen":"钢笔","Hello, Danny.":"你好丹尼。","Hello, Kitty.":"你好,凯蒂。","Here you are.":"给你。","How are you?":"你好吗?","see":"看见","book":"书"},"M1U1":{"Kitty":"凯蒂","Good morning.":"早上好。","Eddie":"艾迪","Nice to see you.":"见到你很高兴。","Hello! Miss Fang.":"你好,方小姐。","nice":"漂亮的","good":"好的","hi":"嗨","Bye-Bye":"拜拜","Hello, Peter! Hello, John!":"你好,皮特!你好,约翰!","Hello! I\u0027m Miss Fang.":"你好,我是方小姐。","Danny":"丹尼","Hi, I\u0027m Tom.":"嗨,我是汤姆。","Alice":"爱丽丝","Good afternoon.":"下午好。","I\u0027m":"我是","Goodbye":"再见","afternoon":"下午","I\u0027m fine. Thank you.":"我很好。谢谢。","goodbye":"再见","Hello. Hi":"你好,嗨!","Nice to see you too.":"见到你很高兴。","morning":"早上","Hello. I\u0027m Danny.":"你好,我是丹尼。","hello":"你好","How are you?":"你好吗?","Hello, Linda! Hello, Tom!":"你好,琳达! 你好,汤姆!"}}}}@@@cjx@@@