BOOL IsUTF8(char* buf)
{
if(buf == NULL)
return FALSE;
int bufLen = strlen(buf);
if(bufLen == 0)
return TRUE;
unsigned one_byte = 0X00; //binary 00000000
unsigned special_byte = 0X02; //binary 00000010
unsigned two_byte = 0X06; //binary 00000110
unsigned three_byte = 0X0E; //binary 00001110
unsigned four_byte = 0X1E; //binary 00011110
unsigned five_byte = 0X3E; //binary 00111110
unsigned six_byte = 0X7E; //binary 01111110
unsigned char c1 = 0;
unsigned char c2 = 0;
unsigned char c3 = 0;
unsigned char c4 = 0;
unsigned char c5 = 0;
unsigned char c6 = 0;
int i = 0;
char * tmp = buf;
if(bufLen >= 3 && (BYTE)*tmp == 0xEF && (BYTE)*(tmp+1) == 0xBB && (BYTE)*(tmp+2) == 0xBF)//BOM
{
i = 3;
}
for(; i < bufLen; )
{
tmp = buf + i;
c1 = (unsigned char)*tmp;
if(c1>>7 == one_byte)//0XXXXXXX
{
i++;
continue;
}
else if((c1>>5 == two_byte) && ((i + 1) < bufLen))//110XXXXX
{
c2 = (unsigned char)*(tmp+1);
if(c2>>6 == special_byte)
{
i += 2;
continue;
}
else
{
return FALSE;
}
}
else if((c1>>4 == three_byte) && ((i + 2) < bufLen))//1110XXXX
{
c2 = (unsigned char)*(tmp+1);
c3 = (unsigned char)*(tmp+2);
if(c2>>6 == special_byte && c3>>6 == special_byte)
{
i += 3;
continue;
}
else
{
return FALSE;
}
}
else if((c1>>3 == four_byte) && ((i + 3) < bufLen))//11110XXX
{
c2 = (unsigned char)*(tmp+1);
c3 = (unsigned char)*(tmp+2);
c4 = (unsigned char)*(tmp+3);
if(c2>>6 == special_byte && c3>>6 == special_byte && c4>>6 == special_byte)
{
i += 4;
continue;
}
else
{
return FALSE;
}
}
else if((c1>>2 == five_byte) && ((i + 4) < bufLen))//111110XX
{
c2 = (unsigned char)*(tmp+1);
c3 = (unsigned char)*(tmp+2);
c4 = (unsigned char)*(tmp+3);
c5 = (unsigned char)*(tmp+4);
if(c2>>6 == special_byte && c3>>6 == special_byte && c4>>6 == special_byte && c5>>6 == special_byte)
{
i += 5;
continue;
}
else
{
return FALSE;
}
}
else if((c1>>1 == five_byte) && ((i + 5) < bufLen))//1111110X
{
c2 = (unsigned char)*(tmp+1);
c3 = (unsigned char)*(tmp+2);
c4 = (unsigned char)*(tmp+3);
c5 = (unsigned char)*(tmp+4);
c6 = (unsigned char)*(tmp+5);
if(c2>>6 == special_byte && c3>>6 == special_byte && c4>>6 == special_byte && c5>>6 == special_byte && c6>>6 == special_byte)
{
i += 6;
continue;
}
else
{
return FALSE;
}
}
else return FALSE;
}
return TRUE;
}
BOOL IsGB2312(char* buf)
{
if(buf == NULL)
return FALSE;
int bufLen = strlen(buf);
if(bufLen == 0)
return TRUE;
unsigned char c1 = 0;
unsigned char c2 = 0;
char * tmp = buf;
for(int i = 0; i < bufLen; )
{
tmp = buf + i;
c1 = (unsigned char)*tmp;
if(c1>>7 == 0) //0XXXXXXX
{
i++;
continue;
}
else if(c1 >= 0XA1 && c1 <= 0XF7 && (i + 1) < bufLen) //0xA1-0xF7
{
c2 = (unsigned char)*(tmp+1);
if(c2 >= 0XA1 && c2 <= 0XFE) //0xA1-0xFE
{
i += 2;
continue;
}
else
{
return FALSE;
}
}
else return FALSE;
}
return TRUE;
}
BOOL IsGBK(char* buf)
{
if(buf == NULL)
return FALSE;
int bufLen = strlen(buf);
if(bufLen == 0)
return TRUE;
unsigned char c1 = 0;
unsigned char c2 = 0;
char * tmp = buf;
for(int i = 0; i < bufLen; )
{
tmp = buf + i;
c1 = (unsigned char)*tmp;
if(c1>>7 == 0) //0XXXXXXX
{
i++;
continue;
}
else if(c1 >= 0X81 && c1 <= 0XFE && (i + 1) < bufLen) //0x81-0xFE
{
c2 = (unsigned char)*(tmp+1);
if(c2 >= 0X40 && c2 <= 0XFE && c2 != 0X7F) //0x40-0xFE (No 7F)
{
i += 2;
continue;
}
else
{
return FALSE;
}
}
else return FALSE;
}
return TRUE;
}
BOOL IsGB18030(char* buf)
{
if(buf == NULL)
return FALSE;
int bufLen = strlen(buf);
if(bufLen == 0)
return TRUE;
unsigned char c1 = 0;
unsigned char c2 = 0;
unsigned char c3 = 0;
unsigned char c4 = 0;
char * tmp = buf;
for(int i = 0; i < bufLen; )
{
tmp = buf + i;
c1 = (unsigned char)*tmp;
if(c1>>7 == 0) //0XXXXXXX
{
i++;
continue;
}
else if(c1 >= 0X81 && c1 <= 0XFE && (i + 1) < bufLen) //0x81-0xFE
{
c2 = (unsigned char)*(tmp+1);
if(c2 >= 0X40 && c2 <= 0XFE && c2 != 0X7F) //0x40-0xFE (No 7F)
{
i += 2;
continue;
}
else if(c2 >= 0X30 && c2 <= 0X39 && (i + 3) < bufLen) //0x30-0x39
{
c3 = (unsigned char)*(tmp+2);
c4 = (unsigned char)*(tmp+3);
if(c3 >= 0X81 && c3 <= 0XFE && c4 >= 0X30 && c4 <= 0X39)//0x81-0xFE,0x30-0x39
{
i += 4;
continue;
}
else return FALSE;
}
else
{
return FALSE;
}
}
else return FALSE;
}
return TRUE;
}