网上很多代码,好多都是单个字符转码,自己整理了一下分享一下,望指正
UCS2:2字节unicode
utf8:多字节编码
int ucs2_to_utf8(const unsigned char *in, int ilen, unsigned char *out, int olen)
{
int length = 0;
if(!out) return length;
char *start = NULL;
char *pout = out;
for(start = in; start != NULL && start < in+ilen-1; start +=2)
{
unsigned short ucs2_code = *(unsigned short *)start;
if(0x0080 > ucs2_code)
{
/* 1 byte UTF-8 Character.*/
if(length+1 > olen) return -1;
{
int length = 0;
if(!out) return length;
char *start = NULL;
char *pout = out;
for(start = in; start != NULL && start < in+ilen-1; start +=2)
{
unsigned short ucs2_code = *(unsigned short *)start;
if(0x0080 > ucs2_code)
{
/* 1 byte UTF-8 Character.*/
if(length+1 > olen) return -1;
*pout = (char)*start;
length++;
pout ++;
}
else if(0x0800 > ucs2_code)
{
/*2 bytes UTF-8 Character.*/
if(length+2 > olen) return -1;
*pout = ((char)(ucs2_code >> 6)) | 0xc0;
*(pout+1) = ((char)(ucs2_code & 0x003F)) | 0x80;
length += 2;
pout += 2;
}
else
{
/* 3 bytes UTF-8 Character .*/
if(length+3 > olen) return -1;
length++;
pout ++;
}
else if(0x0800 > ucs2_code)
{
/*2 bytes UTF-8 Character.*/
if(length+2 > olen) return -1;
*pout = ((char)(ucs2_code >> 6)) | 0xc0;
*(pout+1) = ((char)(ucs2_code & 0x003F)) | 0x80;
length += 2;
pout += 2;
}
else
{
/* 3 bytes UTF-8 Character .*/
if(length+3 > olen) return -1;
*pout = ((char)(ucs2_code >> 12)) | 0xE0;
*(pout+1) = ((char)((ucs2_code & 0x0FC0)>> 6)) | 0x80;
*(pout+2) = ((char)(ucs2_code & 0x003F)) | 0x80;
length += 3;
pout += 3;
}
}
*(pout+1) = ((char)((ucs2_code & 0x0FC0)>> 6)) | 0x80;
*(pout+2) = ((char)(ucs2_code & 0x003F)) | 0x80;
length += 3;
pout += 3;
}
}
return length;
}
}
int utf8_to_ucs2(const unsigned char *in, int ilen, unsigned char *out,
int olen) {
int olen) {
unsigned char *inoffset = in;
unsigned char *inend = in + ilen;
unsigned char *inend = in + ilen;
int ret = 0;
while (inoffset < inend && ret + 2 <= olen) {
unsigned short temp1, temp2;
while (inoffset < inend && ret + 2 <= olen) {
unsigned short temp1, temp2;
unsigned char * one = inoffset;
unsigned short ucs2_code = 0;
unsigned short ucs2_code = 0;
if (0x00 == (*one & 0x80)) {
/* 1 byte UTF-8 Charater.*/
ucs2_code = (unsigned short) *one;
inoffset++;
memcpy(out + ret, &ucs2_code, 2);
ret += 2;
continue;
/* 1 byte UTF-8 Charater.*/
ucs2_code = (unsigned short) *one;
inoffset++;
memcpy(out + ret, &ucs2_code, 2);
ret += 2;
continue;
} else if (0xc0 == (*one & 0xe0) && 0x80 == (*(one + 1) & 0xc0)) {
/* 2 bytes UTF-8 Charater.*/
temp1 = (unsigned short) (*one & 0x1f);
temp1 <<= 6;
temp1 |= (unsigned short) (*(one + 1) & 0x3f);
ucs2_code = temp1;
inoffset += 2;
memcpy(out + ret, &ucs2_code, 2);
ret += 2;
continue;
} else if (0xe0 == (*one & 0xf0) && 0x80 == (*(one + 1) & 0xc0) && 0x80
== (*(one + 2) & 0xc0)) {
/* 3bytes UTF-8 Charater.*/
temp1 = (unsigned short) (*one & 0x0f);
temp1 <<= 12;
temp2 = (unsigned short) (*(one + 1) & 0x3F);
temp2 <<= 6;
temp1 = temp1 | temp2 | (unsigned short) (*(one + 2) & 0x3F);
ucs2_code = temp1;
inoffset += 3;
memcpy(out + ret, &ucs2_code, 2);
ret += 2;
continue;
} else {
break;
}
}
return ret;
}
/* 2 bytes UTF-8 Charater.*/
temp1 = (unsigned short) (*one & 0x1f);
temp1 <<= 6;
temp1 |= (unsigned short) (*(one + 1) & 0x3f);
ucs2_code = temp1;
inoffset += 2;
memcpy(out + ret, &ucs2_code, 2);
ret += 2;
continue;
} else if (0xe0 == (*one & 0xf0) && 0x80 == (*(one + 1) & 0xc0) && 0x80
== (*(one + 2) & 0xc0)) {
/* 3bytes UTF-8 Charater.*/
temp1 = (unsigned short) (*one & 0x0f);
temp1 <<= 12;
temp2 = (unsigned short) (*(one + 1) & 0x3F);
temp2 <<= 6;
temp1 = temp1 | temp2 | (unsigned short) (*(one + 2) & 0x3F);
ucs2_code = temp1;
inoffset += 3;
memcpy(out + ret, &ucs2_code, 2);
ret += 2;
continue;
} else {
break;
}
}
return ret;
}