UTF8文本头为 EF BB BF
UTF16 文本头: Big-Endian的FEFF; 表明这个字节流是;Little-Endian的FFFE
int convertUTF8UTF16(unsigned char* utf8, int& size8, char* utf16, int& size16)
{
int count =0, i;
char tmp1, tmp2;
unsigned short int integer;
unsigned short int *p;
for(i=0;i {
p = (unsigned short int*)&utf16[i];
if( utf8[count] < 0x80)
{
// <0x80
integer = utf8[count];
count++;
}
else if( (utf8[count] < 0xDF) && (utf8[count]>=0x80))
{
integer = utf8[count] & 0x1F;
integer = integer << 6;
integer += utf8[count+1] &0x3F;
count+=2;
}
else if( (utf8[count] <= 0xEF) && (utf8[count]>=0xDF))
{
integer = utf8[count] & 0x0F;
integer = integer << 6;
integer += utf8[count+1] &0x3F;
integer = integer << 6;
integer += utf8[count+2] &0x3F;
count+=3;
}
else
{
printf("error!/n");
}
*p = integer;
}
size8 = count;
size16 = i;return size16;
}int convertUTF16UTF8(char* utf16, int& size16, char* utf8, int& size8)
{
int i=0, count=0;
char tmp1, tmp2;
unsigned short int integer;
for(i=0;i {
integer = *(unsigned short int*)&utf16[i];
if( integer<0x80)
{
utf8[count] = utf16[i] & 0x7f;
count++;
}
else if( integer>=0x80 && integer<0x07ff)
{
tmp1 = integer>>6;
utf8[count] = 0xC0 | (0x1F & integer>>6);
utf8[count+1] = 0x80 | (0x3F & integer);
count+=2;
}
else if( integer>=0x0800 )
{
tmp1 = integer>>12;
utf8[count] = 0xE0 | (0x0F & integer>>12);
utf8[count+1] = 0x80 | ((0x0FC0 & integer)>>6);
utf8[count+2] = 0x80 | (0x003F & integer);
count += 3;
}
else
{
printf("error/n");
}
}size16 = i;
size8 = count;
return count;
}