代码中包含两个部分转换UTF8和UTF16,结果以二进制方式呈现。
UTF16由于其特殊的编码方式,导致Unicode码值在D800~DFFF之间是没有字符对应的,否则将无法区分是单16位,还是双16位构成。
对于双16位的理解:将一个Unicode码值由一维坐标表示方式变成二维坐标的表现方式。
UTF8完全有字节的前端字节来判定一个Unicode码由几个字节存储。
package test;
import java.util.Scanner;
import javax.xml.ws.AsyncHandler;
public class UnicodeTest {
public static void main(String[] args) {
// TODO Auto-generated method stub
System.out.println("Please input the Unicode:");
Scanner scanner=new Scanner(System.in);
String resultString=null;
int unicode=scanner.nextInt();
resultString=changeToUTF8(unicode);
resultString=formatString(resultString);
System.out.println("UTF-8: "+resultString);
resultString=changeToUTF16(unicode);
resultString=formatString(resultString);
System.out.println("UTF-16: "+resultString);
}
public static String changeToUTF8(int unicode) {
String binary=Integer.toBinaryString(unicode);
int status=binary.length();
if(status>0&&status<8)
status=1;
if(status>7&&status<12)
status=2;
if(status>11&&status<17)
status=3;
if(status>16&&status<22)
status=4;
String result=null;
switch (status) {
case 1:
result=binary;
break;
case 2:
int part1=unicode>>6;
part1=192|part1; //2^7+2^6=192
result=Integer.toBinaryString(part1);
while(status-1!=0) {
part1=unicode>>(status-2)*6;
part1=part1&63;
part1=128|part1;
result=result+Integer.toBinaryString(part1);
status--;
}
break;
case 3:
part1=unicode>>12;
part1=224|part1; //2^7+2^6+2^5=224
result=Integer.toBinaryString(part1);
while(status-1!=0) {
part1=unicode>>(status-2)*6;
part1=part1&63;
part1=128|part1;
result=result+Integer.toBinaryString(part1);
status--;
}
break;
case 4:
part1=unicode>>18;
part1=240|part1; //2^7+2^6+2^5+2^4=240
result=Integer.toBinaryString(part1);
while(status-1!=0) {
part1=unicode>>(status-2)*6;
part1=part1&63;
part1=128|part1;
result=result+Integer.toBinaryString(part1);
status--;
}
break;
default:
result="Out of range!";
break;
}
return result;
}
public static String formatString(String resultString) {
String result=null;
int length=resultString.length();
result=resultString.substring(0, length%8);
for(int i=0;i<length/8;i++) {
result=result+" "+resultString.substring(length%8+8*i, length%8+8*(i+1));
}
return result;
}
public static String changeToUTF16(int unicode) {
String binary=Integer.toBinaryString(unicode);
String result="Out of range!";
/* unicode has no char between 55296 and 57343 */
if((unicode>=0&&unicode<55296)||(unicode>57343&&unicode<=65535)) //11011000 00000000 =55296
result=binary; //11011111 11111111 =57343
/*if unicode is lager than 65535, the char is stored in 2 byte unit, it is like the char is a point in a paper, we have the (x,y) to locate it */
if(unicode>=65536&&unicode<=1114111) //10 11111111 11111111 =1114111
{
int part1=unicode>>10;
part1=part1|55296;
result=Integer.toBinaryString(part1);
part1=unicode&2047;
part1=part1|56320;
result=result+Integer.toBinaryString(part1);
}
return result;
}
}