typedef struct
{
uint16 length;
BOOLEAN is_ucs2;
uint8 *str_ptr;
}MMI_STRING_T;
typedef struct
{
uint32 len;
uint8 *buf_ptr;
}UTF8_T;
#define _MEMCPY(_DEST_PTR, _DEST_LEN, _SRC_PTR, _SRC_LEN, _SIZE) /
_PASSERT((_SIZE) <= (_DEST_LEN), ("MMI_MEMCPY: the _DEST_LEN is too small!")); /
_PASSERT((_SIZE) <= (_SRC_LEN), ("MMI_MEMCPY: the _SRC_LEN is too small!")); /
_MEMCPY((_DEST_PTR), (_SRC_PTR), (_SIZE))
/*****************************************************************************/
// Description : ucs2 -> utf8
///*****************************************************************************/
UTF8_T MMS_UNICODE2UTF(MMI_STRING_T *str_src_ptr)
{
UTF8_T utf8_str = {0};
uint16 ucs2_char = 0; //here, we only consider UCS2. UCS4 is not considered.
int i = 0;
int j = 0;
uint8 *out_ptr = PNULL;
uint32 out_len = 0;
uint32 src_len = 0;
if (str_src_ptr == PNULL || str_src_ptr->str_ptr == PNULL || str_src_ptr->length == 0)
{
utf8_str.buf_ptr = 0;
utf8_str.len = 0;
return utf8_str;
}
//alloc
out_len = str_src_ptr->length / 2 * 3 + 3;
out_ptr = ALLOC(out_len);
_ASSERT(out_ptr != PNULL);
_MEMSET(out_ptr, 0, out_len);
if (!str_src_ptr->is_ucs2)
{
//this is ACSII
_MEMCPY(
out_ptr,
out_len,
str_src_ptr->str_ptr,
str_src_ptr->length,
str_src_ptr->length
);
utf8_str.buf_ptr = out_ptr;
utf8_str.len = str_src_ptr->length;
}
else
{
src_len = str_src_ptr->length;
if ((src_len % 2) != 0) //half of ucs2, ignore it
{
src_len--;
}
for (i = 0, j = 0; i < str_src_ptr->length / 2; i++)
{
//get unicode character
ucs2_char = ((str_src_ptr->str_ptr[2 * i]) << 8) | str_src_ptr->str_ptr[2 * i + 1];
if (ucs2_char <= 0x7F)
{
//0xxx xxxx
//0111 1111 == 0x7F
out_ptr[j] = ucs2_char & 0x7F;
j++;
}
else if (ucs2_char <= 0x7FF)
{
//110x xxxx 10xx xxxx
//11位拆成5,6位
//0000 0111 1100 0000 == 0x7C0 110 == 0x6 //取出高5位+(110 << 5)
//out_ptr[j] = (uint8)(((ucs2_char & 0x7C0) >> 5) | (0x6 << 5));
out_ptr[j] = (uint8)(((ucs2_char & 0x7C0) >> 6) | (0x6 << 5));
j++;
//0000 0000 0011 1111 = 0x3F 10 == 0x2 //取出低6位+(10 << 6)
out_ptr[j] = (uint8)((ucs2_char & 0x3F) | (0x2 << 6));
j++;
}
else if (ucs2_char <= 0xFFFF)
{
//1110 xxxx 10xx xxxx 10xx xxxx
//16位拆成4, 6, 6
//1111 0000 0000 0000 == 0xF000 1110 == 0xE //取出高4位 + (1110 << 4)
out_ptr[j] = (uint8)((ucs2_char & 0xF000) >> 12) | (0xE << 4);
j++;
//0000 1111 1100 0000 == 0xFC0 10 = 0x2 //中间6位 + (10 << 6)
out_ptr[j] = (uint8)(((ucs2_char & 0xFC0 ) >> 6) | (0x2 << 6));
j++;
//0000 0000 0011 1111 == 0x3F 10 = 0x2 //最后6位 + (10 << 6)
out_ptr[j] = (uint8)((ucs2_char & 0x3F) | (0x2 << 6));
j++;
}
}
utf8_str.buf_ptr = out_ptr;
utf8_str.len = j;
_ASSERT(j <= out_len);
}
return utf8_str;
}
/*****************************************************************************/
// Description : utf8 -> ucs2
///*****************************************************************************/
MMI_STRING_T MMS_UTF2UNICODE(UTF8_T *utf8_str_ptr)
{
int i = 0;
int j = 0;
uint8 *ucs2_ptr = PNULL;
uint32 out_len = 0;
uint8 head_char = 0;
uint16 ucs2_char = 0;
BOOLEAN is_asc = TRUE;
MMI_STRING_T str_out = {0};
_ASSERT(utf8_str_ptr != PNULL);
out_len = utf8_str_ptr->len * 2 + 2;
ucs2_ptr = SCI_ALLOC(out_len);
_ASSERT(ucs2_ptr != PNULL);
_MEMSET(ucs2_ptr, 0, out_len);
is_asc = TRUE;
while(i < utf8_str_ptr->len)
{
head_char = utf8_str_ptr->buf_ptr[i];
if (head_char <= 0x7F)
{
//0xxx xxxx
//0111 1111 = 0X7F
ucs2_ptr[j] = 0;
ucs2_ptr[j + 1] = head_char;
j += 2;
i += 1;
}
else if (head_char <= 0xDF)
{
if (i + 1 <= utf8_str_ptr->len - 1)
{
//110x xxxx 10xx xxxx
//1101 1111 == 0xDF
//0001 1111 == 0x1F 0011 1111 == 0x3F
ucs2_char = ((utf8_str_ptr->buf_ptr[i] & 0x1F) << 6) | ((utf8_str_ptr->buf_ptr[i+1] & 0x3F));
ucs2_ptr[j] = (uint8)((ucs2_char & 0xFF00) >> 8);
ucs2_ptr[j + 1] = (uint8)(ucs2_char & 0xFF);
j += 2;
}
i += 2;
is_asc = FALSE;
}
else if (head_char <= 0xEF)
{
if (i + 2 <= utf8_str_ptr->len - 1)
{
//1110 xxxx 10xx xxxx 10xx xxxx
//1110 1111 == 0xEF
//0000 1111 == 0xF 0011 1111 == 0x3F
ucs2_char = ((utf8_str_ptr->buf_ptr[i] & 0xF) << 12) | ((utf8_str_ptr->buf_ptr[i + 1] & 0x3F) << 6) | (utf8_str_ptr->buf_ptr[i + 2] & 0x3F);
ucs2_ptr[j] = (uint8)((ucs2_char & 0xFF00) >> 8);
ucs2_ptr[j + 1] = (uint8)(ucs2_char & 0xFF);
j += 2;
}
i += 3;
is_asc = FALSE;
}
else
{
i++;
break;
}
}
if (is_asc)
{
str_out.is_ucs2 = FALSE;
str_out.length = utf8_str_ptr->len;
_MEMCPY(
ucs2_ptr,
out_len,
utf8_str_ptr->buf_ptr,
utf8_str_ptr->len,
utf8_str_ptr->len
);
str_out.str_ptr = ucs2_ptr;
}
else
{
str_out.is_ucs2 = TRUE;
str_out.length = j;
str_out.str_ptr = ucs2_ptr;
}
return str_out;
}