#include <stdio.h>
#include <stdint.h>
#ifndef FALSE
typedef int8_t BOOL;
#define TRUE 1
#define FALSE 0
#endif
typedef struct _UTF8_HEAD
{
uint8_t countOf1;
uint8_t head;
}UTF8_HEAD;
const static UTF8_HEAD utf8Head[] = {
{0, 0x0},
{1, 0x80},
{2, 0xC0},
{3, 0xE0},
{4, 0xF0},
{5, 0xF8},
{6, 0xFC},
{7, 0xFE},
};
/**
* utf-8
1 bytes 0xxxxxxx
2 bytes 110xxxxx 10xxxxxx
3 bytes 1110xxxx 10xxxxxx 10xxxxxx
4 bytes 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
5 bytes 111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
6 bytes 1111110x 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
*/
BOOL isutf8(const char* data, int32_t len)
{
if(NULL == data)
{
return TRUE;
}
const char* p = data;
const char* end = data+len;
while (p < end)
{
if (0 == (*p & 0x80))
{
++p;
}
else
{
uint8_t i;
for (i = 2; i < sizeof(utf8Head) / sizeof(UTF8_HEAD) - 1; ++i)
{
if (utf8Head[i].head == (*p & utf8Head[i+1].head))
{
uint8_t j;
for(j = 1; j < i; ++j)
{
if (p + j > end)
{
return FALSE;
}
else if (0x80 != (*(p+j) & 0xC0))
{
return FALSE;
}
}
break;
}
else if (i >= 6)
{
//not matched even over 6 bytes
return FALSE;
}
}
if (p + i > end)
{
return FALSE;
}
p += i;
}
}
return TRUE;
}
int main()
{
printf("%d\n", isutf8("中文", 6));
}
判断字符串是否UTF8, 支持最大6个字节的UTF8字符
最新推荐文章于 2024-08-29 09:15:33 发布