项目需要在mac下发送一段unicode数据,试用过mac自带的libiconv 每个字符确占有四个字节,上传到服务器后返回结果不对。从网上找了一段代码,稍加修改,在虚拟机上运行正确。 unsigned short int 占两个字节,符合unicode内存要求。
int myUTF8_to_UNICODE(unsigned short int * unicode, unsigned char* utf8, int len)
{
int length;
unsigned char* t = utf8;
length = 0;
while (utf8 - t < len){
//one byte.ASCII as a, b, c, 1, 2, 3 ect
if ( *(unsigned char *) utf8 <= 0x7f ) {
//expand with 0s.
*unicode++ = *utf8++;
}
//2 byte.
else if ( *(unsigned char *) utf8 <= 0xdf ) {
*unicode++ = ((*(unsigned char *) utf8 & 0x1f) << 6) + ((*(unsigned char *) (utf8 + 1)) & 0x3f);
utf8 += 2;
}
//3 byte.Chinese may use 3 byte.
else {
*unicode++ = ((int) (*(unsigned char *) utf8 & 0x0f) << 12) +
((*(unsigned char *) (utf8 + 1) & 0x3f) << 6) +
(*(unsigned char *) (utf8 + 2) & 0x3f);
utf8 += 3;
}
length++;
}
*unicode = 0;
return (length);
}
调用:
char *pUnicode = new char[nUnlength];
nUnlength= myUTF8_to_UNICODE((unsigned short int *)pUnicode,(unsigned char *)strAuthor,nlength);