PUBLIC uint32 GUI_UTF8ToWstrWithEndPos(//ucs2b len
wchar *wstr_ptr,//out
uint32 wstr_len,//in
const uint8 *utf8_ptr,//in
uint32 utf8_len,//in
uint32 *src_end_pos
)
{
uint32 i = 0;
uint32 j = 0;
uint32 k = 0;
uint16 ucs2_char = 0;
uint8 head_char = 0;
if (PNULL == wstr_ptr || PNULL == utf8_ptr)
{
return j;
}
while(i < utf8_len)
{
SCI_ASSERT( j + 1 < wstr_len * sizeof(wchar) ); /*assert to do*/
k = i;
head_char = utf8_ptr[i++];
if (head_char <= 0x7F)
{
//0xxx xxxx
//0111 1111 = 0X7F
wstr_ptr[j ++] = head_char;
}
else if (head_char <= 0xDF)
{
if (i < utf8_len)
{
//110x xxxx 10xx xxxx
//1101 1111 == 0xDF
//0001 1111 == 0x1F 0011 1111 == 0x3F
ucs2_char = (uint16)(((head_char & 0x1F) << 6) | ((utf8_ptr[i++] & 0x3F)));
wstr_ptr[j++] = ucs2_char;
}
else
{
break;
}
}
else if (head_char <= 0xEF)
{
if (i + 1 < utf8_len)
{
//1110 xxxx 10xx xxxx 10xx xxxx
//1110 1111 == 0xEF
//0000 1111 == 0xF 0011 1111 == 0x3F
ucs2_char = (uint16)(((head_char & 0xF) << 12) | ((utf8_ptr[i] & 0x3F) << 6) | (utf8_ptr[i + 1] & 0x3F));
i += 2;
wstr_ptr[j++] = ucs2_char;
}
else
{
break;
}
}
else if(head_char <= 0xF7)
{
//1111 0xxx 10xx xxxx 10xx xxxx 10xx xxxx
//1111 0111 == 0xF7
//0000 0111 == 0x7 0011 1111 == 0x3F
if (i + 1 < utf8_len)
{
ucs2_char = (uint16)(((head_char & 0x7) << 18) | ((utf8_ptr[i] & 0x3F) << 12) | ((utf8_ptr[i+1] & 0x3F) << 6) | (utf8_ptr[i + 2] & 0x3F));
i += 3;
wstr_ptr[j++] = ucs2_char;
}
else
{
break;
}
}
else if(head_char <= 0xFB)
{
//1111 10xx 10xx xxxx 10xx xxxx 10xx xxxx 10xx xxxx
//1111 1011 == 0xFB
//0000 0011 == 0x3 0011 1111 == 0x3F
if (i + 1 < utf8_len)
{
ucs2_char = (uint16)(((head_char & 0x3) << 24) | ((utf8_ptr[i] & 0x3F) << 18) | ((utf8_ptr[i+1] & 0x3F) << 12)
| ((utf8_ptr[i+2] & 0x3F) << 6) | (utf8_ptr[i + 3] & 0x3F));
i += 4;
wstr_ptr[j++] = ucs2_char;
}
else
{
break;
}
}
else if(head_char <= 0xFD)
{
//1111 110x 10xx xxxx 10xx xxxx 10xx xxxx 10xx xxxx 10xx xxxx
//1111 1101 == 0xFD
//0000 0001 == 0x1 0011 1111 == 0x3F
if (i + 1 < utf8_len)
{
ucs2_char = (uint16)(((head_char & 0x3) << 30) | ((utf8_ptr[i] & 0x3F) << 24) | ((utf8_ptr[i+1] & 0x3F) << 18)
| ((utf8_ptr[i+2] & 0x3F) << 12) | ((utf8_ptr[i+3] & 0x3F) << 6) | (utf8_ptr[i + 4] & 0x3F));
i += 5;
wstr_ptr[j++] = ucs2_char;
}
else
{
break;
}
}
else
{
SCI_TRACE_LOW("GUI_UTF82UCS2B: i %d char %x",i,head_char);
break;
}
k = i;
}
if(src_end_pos != PNULL)
{
*src_end_pos = (uint32)(utf8_ptr + k);
}
return j;
}
wchar *wstr_ptr,//out
uint32 wstr_len,//in
const uint8 *utf8_ptr,//in
uint32 utf8_len,//in
uint32 *src_end_pos
)
{
uint32 i = 0;
uint32 j = 0;
uint32 k = 0;
uint16 ucs2_char = 0;
uint8 head_char = 0;
if (PNULL == wstr_ptr || PNULL == utf8_ptr)
{
return j;
}
while(i < utf8_len)
{
SCI_ASSERT( j + 1 < wstr_len * sizeof(wchar) ); /*assert to do*/
k = i;
head_char = utf8_ptr[i++];
if (head_char <= 0x7F)
{
//0xxx xxxx
//0111 1111 = 0X7F
wstr_ptr[j ++] = head_char;
}
else if (head_char <= 0xDF)
{
if (i < utf8_len)
{
//110x xxxx 10xx xxxx
//1101 1111 == 0xDF
//0001 1111 == 0x1F 0011 1111 == 0x3F
ucs2_char = (uint16)(((head_char & 0x1F) << 6) | ((utf8_ptr[i++] & 0x3F)));
wstr_ptr[j++] = ucs2_char;
}
else
{
break;
}
}
else if (head_char <= 0xEF)
{
if (i + 1 < utf8_len)
{
//1110 xxxx 10xx xxxx 10xx xxxx
//1110 1111 == 0xEF
//0000 1111 == 0xF 0011 1111 == 0x3F
ucs2_char = (uint16)(((head_char & 0xF) << 12) | ((utf8_ptr[i] & 0x3F) << 6) | (utf8_ptr[i + 1] & 0x3F));
i += 2;
wstr_ptr[j++] = ucs2_char;
}
else
{
break;
}
}
else if(head_char <= 0xF7)
{
//1111 0xxx 10xx xxxx 10xx xxxx 10xx xxxx
//1111 0111 == 0xF7
//0000 0111 == 0x7 0011 1111 == 0x3F
if (i + 1 < utf8_len)
{
ucs2_char = (uint16)(((head_char & 0x7) << 18) | ((utf8_ptr[i] & 0x3F) << 12) | ((utf8_ptr[i+1] & 0x3F) << 6) | (utf8_ptr[i + 2] & 0x3F));
i += 3;
wstr_ptr[j++] = ucs2_char;
}
else
{
break;
}
}
else if(head_char <= 0xFB)
{
//1111 10xx 10xx xxxx 10xx xxxx 10xx xxxx 10xx xxxx
//1111 1011 == 0xFB
//0000 0011 == 0x3 0011 1111 == 0x3F
if (i + 1 < utf8_len)
{
ucs2_char = (uint16)(((head_char & 0x3) << 24) | ((utf8_ptr[i] & 0x3F) << 18) | ((utf8_ptr[i+1] & 0x3F) << 12)
| ((utf8_ptr[i+2] & 0x3F) << 6) | (utf8_ptr[i + 3] & 0x3F));
i += 4;
wstr_ptr[j++] = ucs2_char;
}
else
{
break;
}
}
else if(head_char <= 0xFD)
{
//1111 110x 10xx xxxx 10xx xxxx 10xx xxxx 10xx xxxx 10xx xxxx
//1111 1101 == 0xFD
//0000 0001 == 0x1 0011 1111 == 0x3F
if (i + 1 < utf8_len)
{
ucs2_char = (uint16)(((head_char & 0x3) << 30) | ((utf8_ptr[i] & 0x3F) << 24) | ((utf8_ptr[i+1] & 0x3F) << 18)
| ((utf8_ptr[i+2] & 0x3F) << 12) | ((utf8_ptr[i+3] & 0x3F) << 6) | (utf8_ptr[i + 4] & 0x3F));
i += 5;
wstr_ptr[j++] = ucs2_char;
}
else
{
break;
}
}
else
{
SCI_TRACE_LOW("GUI_UTF82UCS2B: i %d char %x",i,head_char);
break;
}
k = i;
}
if(src_end_pos != PNULL)
{
*src_end_pos = (uint32)(utf8_ptr + k);
}
return j;
}